sqlglot.parser
1from __future__ import annotations 2 3import itertools 4import logging 5import re 6import typing as t 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, TokenError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 TokenType.SESSION, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *CREATABLES, 573 *SUBQUERY_PREDICATES, 574 *TYPE_TOKENS, 575 *NO_PAREN_FUNCTIONS, 576 } 577 ID_VAR_TOKENS.remove(TokenType.UNION) 578 579 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 580 TokenType.ANTI, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.UTC_DATE, 646 TokenType.UTC_TIME, 647 TokenType.UTC_TIMESTAMP, 648 TokenType.WINDOW, 649 TokenType.XOR, 650 *TYPE_TOKENS, 651 *SUBQUERY_PREDICATES, 652 } 653 654 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 655 TokenType.AND: exp.And, 656 } 657 658 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 659 TokenType.COLON_EQ: exp.PropertyEQ, 660 } 661 662 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 663 TokenType.OR: exp.Or, 664 } 665 666 EQUALITY = { 667 TokenType.EQ: exp.EQ, 668 TokenType.NEQ: exp.NEQ, 669 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 670 } 671 672 COMPARISON = { 673 TokenType.GT: exp.GT, 674 TokenType.GTE: exp.GTE, 675 TokenType.LT: exp.LT, 676 TokenType.LTE: exp.LTE, 677 } 678 679 BITWISE = { 680 TokenType.AMP: exp.BitwiseAnd, 681 TokenType.CARET: exp.BitwiseXor, 682 TokenType.PIPE: exp.BitwiseOr, 683 } 684 685 TERM = { 686 TokenType.DASH: exp.Sub, 687 TokenType.PLUS: exp.Add, 688 TokenType.MOD: exp.Mod, 689 TokenType.COLLATE: exp.Collate, 690 } 691 692 FACTOR = { 693 TokenType.DIV: exp.IntDiv, 694 TokenType.LR_ARROW: exp.Distance, 695 TokenType.SLASH: exp.Div, 696 TokenType.STAR: exp.Mul, 697 } 698 699 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 700 701 TIMES = { 702 TokenType.TIME, 703 TokenType.TIMETZ, 704 } 705 706 TIMESTAMPS = { 707 TokenType.TIMESTAMP, 708 TokenType.TIMESTAMPNTZ, 709 TokenType.TIMESTAMPTZ, 710 TokenType.TIMESTAMPLTZ, 711 *TIMES, 712 } 713 714 SET_OPERATIONS = { 715 TokenType.UNION, 716 TokenType.INTERSECT, 717 TokenType.EXCEPT, 718 } 719 720 JOIN_METHODS = { 721 TokenType.ASOF, 722 TokenType.NATURAL, 723 TokenType.POSITIONAL, 724 } 725 726 JOIN_SIDES = { 727 TokenType.LEFT, 728 TokenType.RIGHT, 729 TokenType.FULL, 730 } 731 732 JOIN_KINDS = { 733 TokenType.ANTI, 734 TokenType.CROSS, 735 TokenType.INNER, 736 TokenType.OUTER, 737 TokenType.SEMI, 738 TokenType.STRAIGHT_JOIN, 739 } 740 741 JOIN_HINTS: t.Set[str] = set() 742 743 LAMBDAS = { 744 TokenType.ARROW: lambda self, expressions: self.expression( 745 exp.Lambda, 746 this=self._replace_lambda( 747 self._parse_assignment(), 748 expressions, 749 ), 750 expressions=expressions, 751 ), 752 TokenType.FARROW: lambda self, expressions: self.expression( 753 exp.Kwarg, 754 this=exp.var(expressions[0].name), 755 expression=self._parse_assignment(), 756 ), 757 } 758 759 COLUMN_OPERATORS = { 760 TokenType.DOT: None, 761 TokenType.DOTCOLON: lambda self, this, to: self.expression( 762 exp.JSONCast, 763 this=this, 764 to=to, 765 ), 766 TokenType.DCOLON: lambda self, this, to: self.build_cast( 767 strict=self.STRICT_CAST, this=this, to=to 768 ), 769 TokenType.ARROW: lambda self, this, path: self.expression( 770 exp.JSONExtract, 771 this=this, 772 expression=self.dialect.to_json_path(path), 773 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 774 ), 775 TokenType.DARROW: lambda self, this, path: self.expression( 776 exp.JSONExtractScalar, 777 this=this, 778 expression=self.dialect.to_json_path(path), 779 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 780 ), 781 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 782 exp.JSONBExtract, 783 this=this, 784 expression=path, 785 ), 786 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 787 exp.JSONBExtractScalar, 788 this=this, 789 expression=path, 790 ), 791 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 792 exp.JSONBContains, 793 this=this, 794 expression=key, 795 ), 796 } 797 798 CAST_COLUMN_OPERATORS = { 799 TokenType.DOTCOLON, 800 TokenType.DCOLON, 801 } 802 803 EXPRESSION_PARSERS = { 804 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 805 exp.Column: lambda self: self._parse_column(), 806 exp.Condition: lambda self: self._parse_assignment(), 807 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 808 exp.Expression: lambda self: self._parse_expression(), 809 exp.From: lambda self: self._parse_from(joins=True), 810 exp.Group: lambda self: self._parse_group(), 811 exp.Having: lambda self: self._parse_having(), 812 exp.Hint: lambda self: self._parse_hint_body(), 813 exp.Identifier: lambda self: self._parse_id_var(), 814 exp.Join: lambda self: self._parse_join(), 815 exp.Lambda: lambda self: self._parse_lambda(), 816 exp.Lateral: lambda self: self._parse_lateral(), 817 exp.Limit: lambda self: self._parse_limit(), 818 exp.Offset: lambda self: self._parse_offset(), 819 exp.Order: lambda self: self._parse_order(), 820 exp.Ordered: lambda self: self._parse_ordered(), 821 exp.Properties: lambda self: self._parse_properties(), 822 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 823 exp.Qualify: lambda self: self._parse_qualify(), 824 exp.Returning: lambda self: self._parse_returning(), 825 exp.Select: lambda self: self._parse_select(), 826 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 827 exp.Table: lambda self: self._parse_table_parts(), 828 exp.TableAlias: lambda self: self._parse_table_alias(), 829 exp.Tuple: lambda self: self._parse_value(values=False), 830 exp.Whens: lambda self: self._parse_when_matched(), 831 exp.Where: lambda self: self._parse_where(), 832 exp.Window: lambda self: self._parse_named_window(), 833 exp.With: lambda self: self._parse_with(), 834 "JOIN_TYPE": lambda self: self._parse_join_parts(), 835 } 836 837 STATEMENT_PARSERS = { 838 TokenType.ALTER: lambda self: self._parse_alter(), 839 TokenType.ANALYZE: lambda self: self._parse_analyze(), 840 TokenType.BEGIN: lambda self: self._parse_transaction(), 841 TokenType.CACHE: lambda self: self._parse_cache(), 842 TokenType.COMMENT: lambda self: self._parse_comment(), 843 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 844 TokenType.COPY: lambda self: self._parse_copy(), 845 TokenType.CREATE: lambda self: self._parse_create(), 846 TokenType.DELETE: lambda self: self._parse_delete(), 847 TokenType.DESC: lambda self: self._parse_describe(), 848 TokenType.DESCRIBE: lambda self: self._parse_describe(), 849 TokenType.DROP: lambda self: self._parse_drop(), 850 TokenType.GRANT: lambda self: self._parse_grant(), 851 TokenType.REVOKE: lambda self: self._parse_revoke(), 852 TokenType.INSERT: lambda self: self._parse_insert(), 853 TokenType.KILL: lambda self: self._parse_kill(), 854 TokenType.LOAD: lambda self: self._parse_load(), 855 TokenType.MERGE: lambda self: self._parse_merge(), 856 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 857 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 858 TokenType.REFRESH: lambda self: self._parse_refresh(), 859 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 860 TokenType.SET: lambda self: self._parse_set(), 861 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 862 TokenType.UNCACHE: lambda self: self._parse_uncache(), 863 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 864 TokenType.UPDATE: lambda self: self._parse_update(), 865 TokenType.USE: lambda self: self._parse_use(), 866 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 867 } 868 869 UNARY_PARSERS = { 870 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 871 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 872 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 873 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 874 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 875 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 876 } 877 878 STRING_PARSERS = { 879 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 880 exp.RawString, this=token.text 881 ), 882 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 883 exp.National, this=token.text 884 ), 885 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 886 TokenType.STRING: lambda self, token: self.expression( 887 exp.Literal, this=token.text, is_string=True 888 ), 889 TokenType.UNICODE_STRING: lambda self, token: self.expression( 890 exp.UnicodeString, 891 this=token.text, 892 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 893 ), 894 } 895 896 NUMERIC_PARSERS = { 897 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 898 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 899 TokenType.HEX_STRING: lambda self, token: self.expression( 900 exp.HexString, 901 this=token.text, 902 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 903 ), 904 TokenType.NUMBER: lambda self, token: self.expression( 905 exp.Literal, this=token.text, is_string=False 906 ), 907 } 908 909 PRIMARY_PARSERS = { 910 **STRING_PARSERS, 911 **NUMERIC_PARSERS, 912 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 913 TokenType.NULL: lambda self, _: self.expression(exp.Null), 914 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 915 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 916 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 917 TokenType.STAR: lambda self, _: self._parse_star_ops(), 918 } 919 920 PLACEHOLDER_PARSERS = { 921 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 922 TokenType.PARAMETER: lambda self: self._parse_parameter(), 923 TokenType.COLON: lambda self: ( 924 self.expression(exp.Placeholder, this=self._prev.text) 925 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 926 else None 927 ), 928 } 929 930 RANGE_PARSERS = { 931 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 932 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 933 TokenType.GLOB: binary_range_parser(exp.Glob), 934 TokenType.ILIKE: binary_range_parser(exp.ILike), 935 TokenType.IN: lambda self, this: self._parse_in(this), 936 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 937 TokenType.IS: lambda self, this: self._parse_is(this), 938 TokenType.LIKE: binary_range_parser(exp.Like), 939 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 940 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 941 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 942 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 943 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 944 } 945 946 PIPE_SYNTAX_TRANSFORM_PARSERS = { 947 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 948 "AS": lambda self, query: self._build_pipe_cte( 949 query, [exp.Star()], self._parse_table_alias() 950 ), 951 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 952 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 953 "ORDER BY": lambda self, query: query.order_by( 954 self._parse_order(), append=False, copy=False 955 ), 956 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 957 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 958 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 959 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 960 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 961 } 962 963 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 964 "ALLOWED_VALUES": lambda self: self.expression( 965 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 966 ), 967 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 968 "AUTO": lambda self: self._parse_auto_property(), 969 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 970 "BACKUP": lambda self: self.expression( 971 exp.BackupProperty, this=self._parse_var(any_token=True) 972 ), 973 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 974 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 975 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 976 "CHECKSUM": lambda self: self._parse_checksum(), 977 "CLUSTER BY": lambda self: self._parse_cluster(), 978 "CLUSTERED": lambda self: self._parse_clustered_by(), 979 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 980 exp.CollateProperty, **kwargs 981 ), 982 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 983 "CONTAINS": lambda self: self._parse_contains_property(), 984 "COPY": lambda self: self._parse_copy_property(), 985 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 986 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 987 "DEFINER": lambda self: self._parse_definer(), 988 "DETERMINISTIC": lambda self: self.expression( 989 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 990 ), 991 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 992 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 993 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 994 "DISTKEY": lambda self: self._parse_distkey(), 995 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 996 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 997 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 998 "ENVIRONMENT": lambda self: self.expression( 999 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1000 ), 1001 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1002 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1003 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1004 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1005 "FREESPACE": lambda self: self._parse_freespace(), 1006 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1007 "HEAP": lambda self: self.expression(exp.HeapProperty), 1008 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1009 "IMMUTABLE": lambda self: self.expression( 1010 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1011 ), 1012 "INHERITS": lambda self: self.expression( 1013 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1014 ), 1015 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1016 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1017 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1018 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1019 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1020 "LIKE": lambda self: self._parse_create_like(), 1021 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1022 "LOCK": lambda self: self._parse_locking(), 1023 "LOCKING": lambda self: self._parse_locking(), 1024 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1025 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1026 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1027 "MODIFIES": lambda self: self._parse_modifies_property(), 1028 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1029 "NO": lambda self: self._parse_no_property(), 1030 "ON": lambda self: self._parse_on_property(), 1031 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1032 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1033 "PARTITION": lambda self: self._parse_partitioned_of(), 1034 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1035 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1036 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1037 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1038 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1039 "READS": lambda self: self._parse_reads_property(), 1040 "REMOTE": lambda self: self._parse_remote_with_connection(), 1041 "RETURNS": lambda self: self._parse_returns(), 1042 "STRICT": lambda self: self.expression(exp.StrictProperty), 1043 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1044 "ROW": lambda self: self._parse_row(), 1045 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1046 "SAMPLE": lambda self: self.expression( 1047 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1048 ), 1049 "SECURE": lambda self: self.expression(exp.SecureProperty), 1050 "SECURITY": lambda self: self._parse_security(), 1051 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1052 "SETTINGS": lambda self: self._parse_settings_property(), 1053 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1054 "SORTKEY": lambda self: self._parse_sortkey(), 1055 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1056 "STABLE": lambda self: self.expression( 1057 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1058 ), 1059 "STORED": lambda self: self._parse_stored(), 1060 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1061 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1062 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1063 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1064 "TO": lambda self: self._parse_to_table(), 1065 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1066 "TRANSFORM": lambda self: self.expression( 1067 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1068 ), 1069 "TTL": lambda self: self._parse_ttl(), 1070 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1071 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1072 "VOLATILE": lambda self: self._parse_volatile_property(), 1073 "WITH": lambda self: self._parse_with_property(), 1074 } 1075 1076 CONSTRAINT_PARSERS = { 1077 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1078 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1079 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1080 "CHARACTER SET": lambda self: self.expression( 1081 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1082 ), 1083 "CHECK": lambda self: self.expression( 1084 exp.CheckColumnConstraint, 1085 this=self._parse_wrapped(self._parse_assignment), 1086 enforced=self._match_text_seq("ENFORCED"), 1087 ), 1088 "COLLATE": lambda self: self.expression( 1089 exp.CollateColumnConstraint, 1090 this=self._parse_identifier() or self._parse_column(), 1091 ), 1092 "COMMENT": lambda self: self.expression( 1093 exp.CommentColumnConstraint, this=self._parse_string() 1094 ), 1095 "COMPRESS": lambda self: self._parse_compress(), 1096 "CLUSTERED": lambda self: self.expression( 1097 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1098 ), 1099 "NONCLUSTERED": lambda self: self.expression( 1100 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1101 ), 1102 "DEFAULT": lambda self: self.expression( 1103 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1104 ), 1105 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1106 "EPHEMERAL": lambda self: self.expression( 1107 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1108 ), 1109 "EXCLUDE": lambda self: self.expression( 1110 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1111 ), 1112 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1113 "FORMAT": lambda self: self.expression( 1114 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1115 ), 1116 "GENERATED": lambda self: self._parse_generated_as_identity(), 1117 "IDENTITY": lambda self: self._parse_auto_increment(), 1118 "INLINE": lambda self: self._parse_inline(), 1119 "LIKE": lambda self: self._parse_create_like(), 1120 "NOT": lambda self: self._parse_not_constraint(), 1121 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1122 "ON": lambda self: ( 1123 self._match(TokenType.UPDATE) 1124 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1125 ) 1126 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1127 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1128 "PERIOD": lambda self: self._parse_period_for_system_time(), 1129 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1130 "REFERENCES": lambda self: self._parse_references(match=False), 1131 "TITLE": lambda self: self.expression( 1132 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1133 ), 1134 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1135 "UNIQUE": lambda self: self._parse_unique(), 1136 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1137 "WATERMARK": lambda self: self.expression( 1138 exp.WatermarkColumnConstraint, 1139 this=self._match(TokenType.FOR) and self._parse_column(), 1140 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1141 ), 1142 "WITH": lambda self: self.expression( 1143 exp.Properties, expressions=self._parse_wrapped_properties() 1144 ), 1145 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1146 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1147 } 1148 1149 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1150 if not self._match(TokenType.L_PAREN, advance=False): 1151 # Partitioning by bucket or truncate follows the syntax: 1152 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1153 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1154 self._retreat(self._index - 1) 1155 return None 1156 1157 klass = ( 1158 exp.PartitionedByBucket 1159 if self._prev.text.upper() == "BUCKET" 1160 else exp.PartitionByTruncate 1161 ) 1162 1163 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1164 this, expression = seq_get(args, 0), seq_get(args, 1) 1165 1166 if isinstance(this, exp.Literal): 1167 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1168 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1169 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1170 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1171 # 1172 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1173 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1174 this, expression = expression, this 1175 1176 return self.expression(klass, this=this, expression=expression) 1177 1178 ALTER_PARSERS = { 1179 "ADD": lambda self: self._parse_alter_table_add(), 1180 "AS": lambda self: self._parse_select(), 1181 "ALTER": lambda self: self._parse_alter_table_alter(), 1182 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1183 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1184 "DROP": lambda self: self._parse_alter_table_drop(), 1185 "RENAME": lambda self: self._parse_alter_table_rename(), 1186 "SET": lambda self: self._parse_alter_table_set(), 1187 "SWAP": lambda self: self.expression( 1188 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1189 ), 1190 } 1191 1192 ALTER_ALTER_PARSERS = { 1193 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1194 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1195 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1196 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1197 } 1198 1199 SCHEMA_UNNAMED_CONSTRAINTS = { 1200 "CHECK", 1201 "EXCLUDE", 1202 "FOREIGN KEY", 1203 "LIKE", 1204 "PERIOD", 1205 "PRIMARY KEY", 1206 "UNIQUE", 1207 "WATERMARK", 1208 "BUCKET", 1209 "TRUNCATE", 1210 } 1211 1212 NO_PAREN_FUNCTION_PARSERS = { 1213 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1214 "CASE": lambda self: self._parse_case(), 1215 "CONNECT_BY_ROOT": lambda self: self.expression( 1216 exp.ConnectByRoot, this=self._parse_column() 1217 ), 1218 "IF": lambda self: self._parse_if(), 1219 } 1220 1221 INVALID_FUNC_NAME_TOKENS = { 1222 TokenType.IDENTIFIER, 1223 TokenType.STRING, 1224 } 1225 1226 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1227 1228 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1229 1230 FUNCTION_PARSERS = { 1231 **{ 1232 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1233 }, 1234 **{ 1235 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1236 }, 1237 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1238 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1239 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1240 "DECODE": lambda self: self._parse_decode(), 1241 "EXTRACT": lambda self: self._parse_extract(), 1242 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1243 "GAP_FILL": lambda self: self._parse_gap_fill(), 1244 "JSON_OBJECT": lambda self: self._parse_json_object(), 1245 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1246 "JSON_TABLE": lambda self: self._parse_json_table(), 1247 "MATCH": lambda self: self._parse_match_against(), 1248 "NORMALIZE": lambda self: self._parse_normalize(), 1249 "OPENJSON": lambda self: self._parse_open_json(), 1250 "OVERLAY": lambda self: self._parse_overlay(), 1251 "POSITION": lambda self: self._parse_position(), 1252 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1253 "STRING_AGG": lambda self: self._parse_string_agg(), 1254 "SUBSTRING": lambda self: self._parse_substring(), 1255 "TRIM": lambda self: self._parse_trim(), 1256 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1257 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1258 "XMLELEMENT": lambda self: self.expression( 1259 exp.XMLElement, 1260 this=self._match_text_seq("NAME") and self._parse_id_var(), 1261 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1262 ), 1263 "XMLTABLE": lambda self: self._parse_xml_table(), 1264 } 1265 1266 QUERY_MODIFIER_PARSERS = { 1267 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1268 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1269 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1270 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1271 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1272 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1273 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1274 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1275 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1276 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1277 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1278 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1279 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1280 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1281 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1282 TokenType.CLUSTER_BY: lambda self: ( 1283 "cluster", 1284 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1285 ), 1286 TokenType.DISTRIBUTE_BY: lambda self: ( 1287 "distribute", 1288 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1289 ), 1290 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1291 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1292 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1293 } 1294 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1295 1296 SET_PARSERS = { 1297 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1298 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1299 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1300 "TRANSACTION": lambda self: self._parse_set_transaction(), 1301 } 1302 1303 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1304 1305 TYPE_LITERAL_PARSERS = { 1306 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1307 } 1308 1309 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1310 1311 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1312 1313 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1314 1315 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1316 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1317 "ISOLATION": ( 1318 ("LEVEL", "REPEATABLE", "READ"), 1319 ("LEVEL", "READ", "COMMITTED"), 1320 ("LEVEL", "READ", "UNCOMITTED"), 1321 ("LEVEL", "SERIALIZABLE"), 1322 ), 1323 "READ": ("WRITE", "ONLY"), 1324 } 1325 1326 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1327 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1328 ) 1329 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1330 1331 CREATE_SEQUENCE: OPTIONS_TYPE = { 1332 "SCALE": ("EXTEND", "NOEXTEND"), 1333 "SHARD": ("EXTEND", "NOEXTEND"), 1334 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1335 **dict.fromkeys( 1336 ( 1337 "SESSION", 1338 "GLOBAL", 1339 "KEEP", 1340 "NOKEEP", 1341 "ORDER", 1342 "NOORDER", 1343 "NOCACHE", 1344 "CYCLE", 1345 "NOCYCLE", 1346 "NOMINVALUE", 1347 "NOMAXVALUE", 1348 "NOSCALE", 1349 "NOSHARD", 1350 ), 1351 tuple(), 1352 ), 1353 } 1354 1355 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1356 1357 USABLES: OPTIONS_TYPE = dict.fromkeys( 1358 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1359 ) 1360 1361 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1362 1363 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1364 "TYPE": ("EVOLUTION",), 1365 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1366 } 1367 1368 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1369 1370 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1371 1372 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1373 "NOT": ("ENFORCED",), 1374 "MATCH": ( 1375 "FULL", 1376 "PARTIAL", 1377 "SIMPLE", 1378 ), 1379 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1380 "USING": ( 1381 "BTREE", 1382 "HASH", 1383 ), 1384 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1385 } 1386 1387 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1388 "NO": ("OTHERS",), 1389 "CURRENT": ("ROW",), 1390 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1391 } 1392 1393 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1394 1395 CLONE_KEYWORDS = {"CLONE", "COPY"} 1396 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1397 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1398 1399 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1400 1401 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1402 1403 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1404 1405 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1406 1407 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1408 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1409 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1410 1411 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1412 1413 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1414 1415 ADD_CONSTRAINT_TOKENS = { 1416 TokenType.CONSTRAINT, 1417 TokenType.FOREIGN_KEY, 1418 TokenType.INDEX, 1419 TokenType.KEY, 1420 TokenType.PRIMARY_KEY, 1421 TokenType.UNIQUE, 1422 } 1423 1424 DISTINCT_TOKENS = {TokenType.DISTINCT} 1425 1426 NULL_TOKENS = {TokenType.NULL} 1427 1428 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1429 1430 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1431 1432 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1433 1434 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1435 1436 ODBC_DATETIME_LITERALS = { 1437 "d": exp.Date, 1438 "t": exp.Time, 1439 "ts": exp.Timestamp, 1440 } 1441 1442 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1443 1444 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1445 1446 # The style options for the DESCRIBE statement 1447 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1448 1449 # The style options for the ANALYZE statement 1450 ANALYZE_STYLES = { 1451 "BUFFER_USAGE_LIMIT", 1452 "FULL", 1453 "LOCAL", 1454 "NO_WRITE_TO_BINLOG", 1455 "SAMPLE", 1456 "SKIP_LOCKED", 1457 "VERBOSE", 1458 } 1459 1460 ANALYZE_EXPRESSION_PARSERS = { 1461 "ALL": lambda self: self._parse_analyze_columns(), 1462 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1463 "DELETE": lambda self: self._parse_analyze_delete(), 1464 "DROP": lambda self: self._parse_analyze_histogram(), 1465 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1466 "LIST": lambda self: self._parse_analyze_list(), 1467 "PREDICATE": lambda self: self._parse_analyze_columns(), 1468 "UPDATE": lambda self: self._parse_analyze_histogram(), 1469 "VALIDATE": lambda self: self._parse_analyze_validate(), 1470 } 1471 1472 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1473 1474 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1475 1476 OPERATION_MODIFIERS: t.Set[str] = set() 1477 1478 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1479 1480 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1481 1482 STRICT_CAST = True 1483 1484 PREFIXED_PIVOT_COLUMNS = False 1485 IDENTIFY_PIVOT_STRINGS = False 1486 1487 LOG_DEFAULTS_TO_LN = False 1488 1489 # Whether the table sample clause expects CSV syntax 1490 TABLESAMPLE_CSV = False 1491 1492 # The default method used for table sampling 1493 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1494 1495 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1496 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1497 1498 # Whether the TRIM function expects the characters to trim as its first argument 1499 TRIM_PATTERN_FIRST = False 1500 1501 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1502 STRING_ALIASES = False 1503 1504 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1505 MODIFIERS_ATTACHED_TO_SET_OP = True 1506 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1507 1508 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1509 NO_PAREN_IF_COMMANDS = True 1510 1511 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1512 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1513 1514 # Whether the `:` operator is used to extract a value from a VARIANT column 1515 COLON_IS_VARIANT_EXTRACT = False 1516 1517 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1518 # If this is True and '(' is not found, the keyword will be treated as an identifier 1519 VALUES_FOLLOWED_BY_PAREN = True 1520 1521 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1522 SUPPORTS_IMPLICIT_UNNEST = False 1523 1524 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1525 INTERVAL_SPANS = True 1526 1527 # Whether a PARTITION clause can follow a table reference 1528 SUPPORTS_PARTITION_SELECTION = False 1529 1530 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1531 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1532 1533 # Whether the 'AS' keyword is optional in the CTE definition syntax 1534 OPTIONAL_ALIAS_TOKEN_CTE = True 1535 1536 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1537 ALTER_RENAME_REQUIRES_COLUMN = True 1538 1539 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1540 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1541 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1542 # as BigQuery, where all joins have the same precedence. 1543 JOINS_HAVE_EQUAL_PRECEDENCE = False 1544 1545 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1546 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1547 1548 # Whether map literals support arbitrary expressions as keys. 1549 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1550 # When False, keys are typically restricted to identifiers. 1551 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1552 1553 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1554 # is true for Snowflake but not for BigQuery which can also process strings 1555 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1556 1557 __slots__ = ( 1558 "error_level", 1559 "error_message_context", 1560 "max_errors", 1561 "dialect", 1562 "sql", 1563 "errors", 1564 "_tokens", 1565 "_index", 1566 "_curr", 1567 "_next", 1568 "_prev", 1569 "_prev_comments", 1570 "_pipe_cte_counter", 1571 ) 1572 1573 # Autofilled 1574 SHOW_TRIE: t.Dict = {} 1575 SET_TRIE: t.Dict = {} 1576 1577 def __init__( 1578 self, 1579 error_level: t.Optional[ErrorLevel] = None, 1580 error_message_context: int = 100, 1581 max_errors: int = 3, 1582 dialect: DialectType = None, 1583 ): 1584 from sqlglot.dialects import Dialect 1585 1586 self.error_level = error_level or ErrorLevel.IMMEDIATE 1587 self.error_message_context = error_message_context 1588 self.max_errors = max_errors 1589 self.dialect = Dialect.get_or_raise(dialect) 1590 self.reset() 1591 1592 def reset(self): 1593 self.sql = "" 1594 self.errors = [] 1595 self._tokens = [] 1596 self._index = 0 1597 self._curr = None 1598 self._next = None 1599 self._prev = None 1600 self._prev_comments = None 1601 self._pipe_cte_counter = 0 1602 1603 def parse( 1604 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1605 ) -> t.List[t.Optional[exp.Expression]]: 1606 """ 1607 Parses a list of tokens and returns a list of syntax trees, one tree 1608 per parsed SQL statement. 1609 1610 Args: 1611 raw_tokens: The list of tokens. 1612 sql: The original SQL string, used to produce helpful debug messages. 1613 1614 Returns: 1615 The list of the produced syntax trees. 1616 """ 1617 return self._parse( 1618 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1619 ) 1620 1621 def parse_into( 1622 self, 1623 expression_types: exp.IntoType, 1624 raw_tokens: t.List[Token], 1625 sql: t.Optional[str] = None, 1626 ) -> t.List[t.Optional[exp.Expression]]: 1627 """ 1628 Parses a list of tokens into a given Expression type. If a collection of Expression 1629 types is given instead, this method will try to parse the token list into each one 1630 of them, stopping at the first for which the parsing succeeds. 1631 1632 Args: 1633 expression_types: The expression type(s) to try and parse the token list into. 1634 raw_tokens: The list of tokens. 1635 sql: The original SQL string, used to produce helpful debug messages. 1636 1637 Returns: 1638 The target Expression. 1639 """ 1640 errors = [] 1641 for expression_type in ensure_list(expression_types): 1642 parser = self.EXPRESSION_PARSERS.get(expression_type) 1643 if not parser: 1644 raise TypeError(f"No parser registered for {expression_type}") 1645 1646 try: 1647 return self._parse(parser, raw_tokens, sql) 1648 except ParseError as e: 1649 e.errors[0]["into_expression"] = expression_type 1650 errors.append(e) 1651 1652 raise ParseError( 1653 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1654 errors=merge_errors(errors), 1655 ) from errors[-1] 1656 1657 def _parse( 1658 self, 1659 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1660 raw_tokens: t.List[Token], 1661 sql: t.Optional[str] = None, 1662 ) -> t.List[t.Optional[exp.Expression]]: 1663 self.reset() 1664 self.sql = sql or "" 1665 1666 total = len(raw_tokens) 1667 chunks: t.List[t.List[Token]] = [[]] 1668 1669 for i, token in enumerate(raw_tokens): 1670 if token.token_type == TokenType.SEMICOLON: 1671 if token.comments: 1672 chunks.append([token]) 1673 1674 if i < total - 1: 1675 chunks.append([]) 1676 else: 1677 chunks[-1].append(token) 1678 1679 expressions = [] 1680 1681 for tokens in chunks: 1682 self._index = -1 1683 self._tokens = tokens 1684 self._advance() 1685 1686 expressions.append(parse_method(self)) 1687 1688 if self._index < len(self._tokens): 1689 self.raise_error("Invalid expression / Unexpected token") 1690 1691 self.check_errors() 1692 1693 return expressions 1694 1695 def check_errors(self) -> None: 1696 """Logs or raises any found errors, depending on the chosen error level setting.""" 1697 if self.error_level == ErrorLevel.WARN: 1698 for error in self.errors: 1699 logger.error(str(error)) 1700 elif self.error_level == ErrorLevel.RAISE and self.errors: 1701 raise ParseError( 1702 concat_messages(self.errors, self.max_errors), 1703 errors=merge_errors(self.errors), 1704 ) 1705 1706 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1707 """ 1708 Appends an error in the list of recorded errors or raises it, depending on the chosen 1709 error level setting. 1710 """ 1711 token = token or self._curr or self._prev or Token.string("") 1712 start = token.start 1713 end = token.end + 1 1714 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1715 highlight = self.sql[start:end] 1716 end_context = self.sql[end : end + self.error_message_context] 1717 1718 error = ParseError.new( 1719 f"{message}. Line {token.line}, Col: {token.col}.\n" 1720 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1721 description=message, 1722 line=token.line, 1723 col=token.col, 1724 start_context=start_context, 1725 highlight=highlight, 1726 end_context=end_context, 1727 ) 1728 1729 if self.error_level == ErrorLevel.IMMEDIATE: 1730 raise error 1731 1732 self.errors.append(error) 1733 1734 def expression( 1735 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1736 ) -> E: 1737 """ 1738 Creates a new, validated Expression. 1739 1740 Args: 1741 exp_class: The expression class to instantiate. 1742 comments: An optional list of comments to attach to the expression. 1743 kwargs: The arguments to set for the expression along with their respective values. 1744 1745 Returns: 1746 The target expression. 1747 """ 1748 instance = exp_class(**kwargs) 1749 instance.add_comments(comments) if comments else self._add_comments(instance) 1750 return self.validate_expression(instance) 1751 1752 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1753 if expression and self._prev_comments: 1754 expression.add_comments(self._prev_comments) 1755 self._prev_comments = None 1756 1757 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1758 """ 1759 Validates an Expression, making sure that all its mandatory arguments are set. 1760 1761 Args: 1762 expression: The expression to validate. 1763 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1764 1765 Returns: 1766 The validated expression. 1767 """ 1768 if self.error_level != ErrorLevel.IGNORE: 1769 for error_message in expression.error_messages(args): 1770 self.raise_error(error_message) 1771 1772 return expression 1773 1774 def _find_sql(self, start: Token, end: Token) -> str: 1775 return self.sql[start.start : end.end + 1] 1776 1777 def _is_connected(self) -> bool: 1778 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1779 1780 def _advance(self, times: int = 1) -> None: 1781 self._index += times 1782 self._curr = seq_get(self._tokens, self._index) 1783 self._next = seq_get(self._tokens, self._index + 1) 1784 1785 if self._index > 0: 1786 self._prev = self._tokens[self._index - 1] 1787 self._prev_comments = self._prev.comments 1788 else: 1789 self._prev = None 1790 self._prev_comments = None 1791 1792 def _retreat(self, index: int) -> None: 1793 if index != self._index: 1794 self._advance(index - self._index) 1795 1796 def _warn_unsupported(self) -> None: 1797 if len(self._tokens) <= 1: 1798 return 1799 1800 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1801 # interested in emitting a warning for the one being currently processed. 1802 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1803 1804 logger.warning( 1805 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1806 ) 1807 1808 def _parse_command(self) -> exp.Command: 1809 self._warn_unsupported() 1810 return self.expression( 1811 exp.Command, 1812 comments=self._prev_comments, 1813 this=self._prev.text.upper(), 1814 expression=self._parse_string(), 1815 ) 1816 1817 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1818 """ 1819 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1820 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1821 solve this by setting & resetting the parser state accordingly 1822 """ 1823 index = self._index 1824 error_level = self.error_level 1825 1826 self.error_level = ErrorLevel.IMMEDIATE 1827 try: 1828 this = parse_method() 1829 except ParseError: 1830 this = None 1831 finally: 1832 if not this or retreat: 1833 self._retreat(index) 1834 self.error_level = error_level 1835 1836 return this 1837 1838 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1839 start = self._prev 1840 exists = self._parse_exists() if allow_exists else None 1841 1842 self._match(TokenType.ON) 1843 1844 materialized = self._match_text_seq("MATERIALIZED") 1845 kind = self._match_set(self.CREATABLES) and self._prev 1846 if not kind: 1847 return self._parse_as_command(start) 1848 1849 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1850 this = self._parse_user_defined_function(kind=kind.token_type) 1851 elif kind.token_type == TokenType.TABLE: 1852 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1853 elif kind.token_type == TokenType.COLUMN: 1854 this = self._parse_column() 1855 else: 1856 this = self._parse_id_var() 1857 1858 self._match(TokenType.IS) 1859 1860 return self.expression( 1861 exp.Comment, 1862 this=this, 1863 kind=kind.text, 1864 expression=self._parse_string(), 1865 exists=exists, 1866 materialized=materialized, 1867 ) 1868 1869 def _parse_to_table( 1870 self, 1871 ) -> exp.ToTableProperty: 1872 table = self._parse_table_parts(schema=True) 1873 return self.expression(exp.ToTableProperty, this=table) 1874 1875 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1876 def _parse_ttl(self) -> exp.Expression: 1877 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1878 this = self._parse_bitwise() 1879 1880 if self._match_text_seq("DELETE"): 1881 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1882 if self._match_text_seq("RECOMPRESS"): 1883 return self.expression( 1884 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1885 ) 1886 if self._match_text_seq("TO", "DISK"): 1887 return self.expression( 1888 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1889 ) 1890 if self._match_text_seq("TO", "VOLUME"): 1891 return self.expression( 1892 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1893 ) 1894 1895 return this 1896 1897 expressions = self._parse_csv(_parse_ttl_action) 1898 where = self._parse_where() 1899 group = self._parse_group() 1900 1901 aggregates = None 1902 if group and self._match(TokenType.SET): 1903 aggregates = self._parse_csv(self._parse_set_item) 1904 1905 return self.expression( 1906 exp.MergeTreeTTL, 1907 expressions=expressions, 1908 where=where, 1909 group=group, 1910 aggregates=aggregates, 1911 ) 1912 1913 def _parse_statement(self) -> t.Optional[exp.Expression]: 1914 if self._curr is None: 1915 return None 1916 1917 if self._match_set(self.STATEMENT_PARSERS): 1918 comments = self._prev_comments 1919 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1920 stmt.add_comments(comments, prepend=True) 1921 return stmt 1922 1923 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1924 return self._parse_command() 1925 1926 expression = self._parse_expression() 1927 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1928 return self._parse_query_modifiers(expression) 1929 1930 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1931 start = self._prev 1932 temporary = self._match(TokenType.TEMPORARY) 1933 materialized = self._match_text_seq("MATERIALIZED") 1934 1935 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1936 if not kind: 1937 return self._parse_as_command(start) 1938 1939 concurrently = self._match_text_seq("CONCURRENTLY") 1940 if_exists = exists or self._parse_exists() 1941 1942 if kind == "COLUMN": 1943 this = self._parse_column() 1944 else: 1945 this = self._parse_table_parts( 1946 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1947 ) 1948 1949 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1950 1951 if self._match(TokenType.L_PAREN, advance=False): 1952 expressions = self._parse_wrapped_csv(self._parse_types) 1953 else: 1954 expressions = None 1955 1956 return self.expression( 1957 exp.Drop, 1958 exists=if_exists, 1959 this=this, 1960 expressions=expressions, 1961 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1962 temporary=temporary, 1963 materialized=materialized, 1964 cascade=self._match_text_seq("CASCADE"), 1965 constraints=self._match_text_seq("CONSTRAINTS"), 1966 purge=self._match_text_seq("PURGE"), 1967 cluster=cluster, 1968 concurrently=concurrently, 1969 ) 1970 1971 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1972 return ( 1973 self._match_text_seq("IF") 1974 and (not not_ or self._match(TokenType.NOT)) 1975 and self._match(TokenType.EXISTS) 1976 ) 1977 1978 def _parse_create(self) -> exp.Create | exp.Command: 1979 # Note: this can't be None because we've matched a statement parser 1980 start = self._prev 1981 1982 replace = ( 1983 start.token_type == TokenType.REPLACE 1984 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1985 or self._match_pair(TokenType.OR, TokenType.ALTER) 1986 ) 1987 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1988 1989 unique = self._match(TokenType.UNIQUE) 1990 1991 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1992 clustered = True 1993 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1994 "COLUMNSTORE" 1995 ): 1996 clustered = False 1997 else: 1998 clustered = None 1999 2000 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2001 self._advance() 2002 2003 properties = None 2004 create_token = self._match_set(self.CREATABLES) and self._prev 2005 2006 if not create_token: 2007 # exp.Properties.Location.POST_CREATE 2008 properties = self._parse_properties() 2009 create_token = self._match_set(self.CREATABLES) and self._prev 2010 2011 if not properties or not create_token: 2012 return self._parse_as_command(start) 2013 2014 concurrently = self._match_text_seq("CONCURRENTLY") 2015 exists = self._parse_exists(not_=True) 2016 this = None 2017 expression: t.Optional[exp.Expression] = None 2018 indexes = None 2019 no_schema_binding = None 2020 begin = None 2021 end = None 2022 clone = None 2023 2024 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2025 nonlocal properties 2026 if properties and temp_props: 2027 properties.expressions.extend(temp_props.expressions) 2028 elif temp_props: 2029 properties = temp_props 2030 2031 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2032 this = self._parse_user_defined_function(kind=create_token.token_type) 2033 2034 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2035 extend_props(self._parse_properties()) 2036 2037 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2038 extend_props(self._parse_properties()) 2039 2040 if not expression: 2041 if self._match(TokenType.COMMAND): 2042 expression = self._parse_as_command(self._prev) 2043 else: 2044 begin = self._match(TokenType.BEGIN) 2045 return_ = self._match_text_seq("RETURN") 2046 2047 if self._match(TokenType.STRING, advance=False): 2048 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2049 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2050 expression = self._parse_string() 2051 extend_props(self._parse_properties()) 2052 else: 2053 expression = self._parse_user_defined_function_expression() 2054 2055 end = self._match_text_seq("END") 2056 2057 if return_: 2058 expression = self.expression(exp.Return, this=expression) 2059 elif create_token.token_type == TokenType.INDEX: 2060 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2061 if not self._match(TokenType.ON): 2062 index = self._parse_id_var() 2063 anonymous = False 2064 else: 2065 index = None 2066 anonymous = True 2067 2068 this = self._parse_index(index=index, anonymous=anonymous) 2069 elif create_token.token_type in self.DB_CREATABLES: 2070 table_parts = self._parse_table_parts( 2071 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2072 ) 2073 2074 # exp.Properties.Location.POST_NAME 2075 self._match(TokenType.COMMA) 2076 extend_props(self._parse_properties(before=True)) 2077 2078 this = self._parse_schema(this=table_parts) 2079 2080 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2081 extend_props(self._parse_properties()) 2082 2083 has_alias = self._match(TokenType.ALIAS) 2084 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2085 # exp.Properties.Location.POST_ALIAS 2086 extend_props(self._parse_properties()) 2087 2088 if create_token.token_type == TokenType.SEQUENCE: 2089 expression = self._parse_types() 2090 props = self._parse_properties() 2091 if props: 2092 sequence_props = exp.SequenceProperties() 2093 options = [] 2094 for prop in props: 2095 if isinstance(prop, exp.SequenceProperties): 2096 for arg, value in prop.args.items(): 2097 if arg == "options": 2098 options.extend(value) 2099 else: 2100 sequence_props.set(arg, value) 2101 prop.pop() 2102 2103 if options: 2104 sequence_props.set("options", options) 2105 2106 props.append("expressions", sequence_props) 2107 extend_props(props) 2108 else: 2109 expression = self._parse_ddl_select() 2110 2111 # Some dialects also support using a table as an alias instead of a SELECT. 2112 # Here we fallback to this as an alternative. 2113 if not expression and has_alias: 2114 expression = self._try_parse(self._parse_table_parts) 2115 2116 if create_token.token_type == TokenType.TABLE: 2117 # exp.Properties.Location.POST_EXPRESSION 2118 extend_props(self._parse_properties()) 2119 2120 indexes = [] 2121 while True: 2122 index = self._parse_index() 2123 2124 # exp.Properties.Location.POST_INDEX 2125 extend_props(self._parse_properties()) 2126 if not index: 2127 break 2128 else: 2129 self._match(TokenType.COMMA) 2130 indexes.append(index) 2131 elif create_token.token_type == TokenType.VIEW: 2132 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2133 no_schema_binding = True 2134 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2135 extend_props(self._parse_properties()) 2136 2137 shallow = self._match_text_seq("SHALLOW") 2138 2139 if self._match_texts(self.CLONE_KEYWORDS): 2140 copy = self._prev.text.lower() == "copy" 2141 clone = self.expression( 2142 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2143 ) 2144 2145 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2146 return self._parse_as_command(start) 2147 2148 create_kind_text = create_token.text.upper() 2149 return self.expression( 2150 exp.Create, 2151 this=this, 2152 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2153 replace=replace, 2154 refresh=refresh, 2155 unique=unique, 2156 expression=expression, 2157 exists=exists, 2158 properties=properties, 2159 indexes=indexes, 2160 no_schema_binding=no_schema_binding, 2161 begin=begin, 2162 end=end, 2163 clone=clone, 2164 concurrently=concurrently, 2165 clustered=clustered, 2166 ) 2167 2168 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2169 seq = exp.SequenceProperties() 2170 2171 options = [] 2172 index = self._index 2173 2174 while self._curr: 2175 self._match(TokenType.COMMA) 2176 if self._match_text_seq("INCREMENT"): 2177 self._match_text_seq("BY") 2178 self._match_text_seq("=") 2179 seq.set("increment", self._parse_term()) 2180 elif self._match_text_seq("MINVALUE"): 2181 seq.set("minvalue", self._parse_term()) 2182 elif self._match_text_seq("MAXVALUE"): 2183 seq.set("maxvalue", self._parse_term()) 2184 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2185 self._match_text_seq("=") 2186 seq.set("start", self._parse_term()) 2187 elif self._match_text_seq("CACHE"): 2188 # T-SQL allows empty CACHE which is initialized dynamically 2189 seq.set("cache", self._parse_number() or True) 2190 elif self._match_text_seq("OWNED", "BY"): 2191 # "OWNED BY NONE" is the default 2192 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2193 else: 2194 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2195 if opt: 2196 options.append(opt) 2197 else: 2198 break 2199 2200 seq.set("options", options if options else None) 2201 return None if self._index == index else seq 2202 2203 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2204 # only used for teradata currently 2205 self._match(TokenType.COMMA) 2206 2207 kwargs = { 2208 "no": self._match_text_seq("NO"), 2209 "dual": self._match_text_seq("DUAL"), 2210 "before": self._match_text_seq("BEFORE"), 2211 "default": self._match_text_seq("DEFAULT"), 2212 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2213 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2214 "after": self._match_text_seq("AFTER"), 2215 "minimum": self._match_texts(("MIN", "MINIMUM")), 2216 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2217 } 2218 2219 if self._match_texts(self.PROPERTY_PARSERS): 2220 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2221 try: 2222 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2223 except TypeError: 2224 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2225 2226 return None 2227 2228 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2229 return self._parse_wrapped_csv(self._parse_property) 2230 2231 def _parse_property(self) -> t.Optional[exp.Expression]: 2232 if self._match_texts(self.PROPERTY_PARSERS): 2233 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2234 2235 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2236 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2237 2238 if self._match_text_seq("COMPOUND", "SORTKEY"): 2239 return self._parse_sortkey(compound=True) 2240 2241 if self._match_text_seq("SQL", "SECURITY"): 2242 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2243 2244 index = self._index 2245 2246 seq_props = self._parse_sequence_properties() 2247 if seq_props: 2248 return seq_props 2249 2250 self._retreat(index) 2251 key = self._parse_column() 2252 2253 if not self._match(TokenType.EQ): 2254 self._retreat(index) 2255 return None 2256 2257 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2258 if isinstance(key, exp.Column): 2259 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2260 2261 value = self._parse_bitwise() or self._parse_var(any_token=True) 2262 2263 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2264 if isinstance(value, exp.Column): 2265 value = exp.var(value.name) 2266 2267 return self.expression(exp.Property, this=key, value=value) 2268 2269 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2270 if self._match_text_seq("BY"): 2271 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2272 2273 self._match(TokenType.ALIAS) 2274 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2275 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2276 2277 return self.expression( 2278 exp.FileFormatProperty, 2279 this=( 2280 self.expression( 2281 exp.InputOutputFormat, 2282 input_format=input_format, 2283 output_format=output_format, 2284 ) 2285 if input_format or output_format 2286 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2287 ), 2288 hive_format=True, 2289 ) 2290 2291 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2292 field = self._parse_field() 2293 if isinstance(field, exp.Identifier) and not field.quoted: 2294 field = exp.var(field) 2295 2296 return field 2297 2298 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2299 self._match(TokenType.EQ) 2300 self._match(TokenType.ALIAS) 2301 2302 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2303 2304 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2305 properties = [] 2306 while True: 2307 if before: 2308 prop = self._parse_property_before() 2309 else: 2310 prop = self._parse_property() 2311 if not prop: 2312 break 2313 for p in ensure_list(prop): 2314 properties.append(p) 2315 2316 if properties: 2317 return self.expression(exp.Properties, expressions=properties) 2318 2319 return None 2320 2321 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2322 return self.expression( 2323 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2324 ) 2325 2326 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2327 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2328 security_specifier = self._prev.text.upper() 2329 return self.expression(exp.SecurityProperty, this=security_specifier) 2330 return None 2331 2332 def _parse_settings_property(self) -> exp.SettingsProperty: 2333 return self.expression( 2334 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2335 ) 2336 2337 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2338 if self._index >= 2: 2339 pre_volatile_token = self._tokens[self._index - 2] 2340 else: 2341 pre_volatile_token = None 2342 2343 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2344 return exp.VolatileProperty() 2345 2346 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2347 2348 def _parse_retention_period(self) -> exp.Var: 2349 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2350 number = self._parse_number() 2351 number_str = f"{number} " if number else "" 2352 unit = self._parse_var(any_token=True) 2353 return exp.var(f"{number_str}{unit}") 2354 2355 def _parse_system_versioning_property( 2356 self, with_: bool = False 2357 ) -> exp.WithSystemVersioningProperty: 2358 self._match(TokenType.EQ) 2359 prop = self.expression( 2360 exp.WithSystemVersioningProperty, 2361 **{ # type: ignore 2362 "on": True, 2363 "with": with_, 2364 }, 2365 ) 2366 2367 if self._match_text_seq("OFF"): 2368 prop.set("on", False) 2369 return prop 2370 2371 self._match(TokenType.ON) 2372 if self._match(TokenType.L_PAREN): 2373 while self._curr and not self._match(TokenType.R_PAREN): 2374 if self._match_text_seq("HISTORY_TABLE", "="): 2375 prop.set("this", self._parse_table_parts()) 2376 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2377 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2378 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2379 prop.set("retention_period", self._parse_retention_period()) 2380 2381 self._match(TokenType.COMMA) 2382 2383 return prop 2384 2385 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2386 self._match(TokenType.EQ) 2387 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2388 prop = self.expression(exp.DataDeletionProperty, on=on) 2389 2390 if self._match(TokenType.L_PAREN): 2391 while self._curr and not self._match(TokenType.R_PAREN): 2392 if self._match_text_seq("FILTER_COLUMN", "="): 2393 prop.set("filter_column", self._parse_column()) 2394 elif self._match_text_seq("RETENTION_PERIOD", "="): 2395 prop.set("retention_period", self._parse_retention_period()) 2396 2397 self._match(TokenType.COMMA) 2398 2399 return prop 2400 2401 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2402 kind = "HASH" 2403 expressions: t.Optional[t.List[exp.Expression]] = None 2404 if self._match_text_seq("BY", "HASH"): 2405 expressions = self._parse_wrapped_csv(self._parse_id_var) 2406 elif self._match_text_seq("BY", "RANDOM"): 2407 kind = "RANDOM" 2408 2409 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2410 buckets: t.Optional[exp.Expression] = None 2411 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2412 buckets = self._parse_number() 2413 2414 return self.expression( 2415 exp.DistributedByProperty, 2416 expressions=expressions, 2417 kind=kind, 2418 buckets=buckets, 2419 order=self._parse_order(), 2420 ) 2421 2422 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2423 self._match_text_seq("KEY") 2424 expressions = self._parse_wrapped_id_vars() 2425 return self.expression(expr_type, expressions=expressions) 2426 2427 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2428 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2429 prop = self._parse_system_versioning_property(with_=True) 2430 self._match_r_paren() 2431 return prop 2432 2433 if self._match(TokenType.L_PAREN, advance=False): 2434 return self._parse_wrapped_properties() 2435 2436 if self._match_text_seq("JOURNAL"): 2437 return self._parse_withjournaltable() 2438 2439 if self._match_texts(self.VIEW_ATTRIBUTES): 2440 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2441 2442 if self._match_text_seq("DATA"): 2443 return self._parse_withdata(no=False) 2444 elif self._match_text_seq("NO", "DATA"): 2445 return self._parse_withdata(no=True) 2446 2447 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2448 return self._parse_serde_properties(with_=True) 2449 2450 if self._match(TokenType.SCHEMA): 2451 return self.expression( 2452 exp.WithSchemaBindingProperty, 2453 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2454 ) 2455 2456 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2457 return self.expression( 2458 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2459 ) 2460 2461 if not self._next: 2462 return None 2463 2464 return self._parse_withisolatedloading() 2465 2466 def _parse_procedure_option(self) -> exp.Expression | None: 2467 if self._match_text_seq("EXECUTE", "AS"): 2468 return self.expression( 2469 exp.ExecuteAsProperty, 2470 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2471 or self._parse_string(), 2472 ) 2473 2474 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2475 2476 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2477 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2478 self._match(TokenType.EQ) 2479 2480 user = self._parse_id_var() 2481 self._match(TokenType.PARAMETER) 2482 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2483 2484 if not user or not host: 2485 return None 2486 2487 return exp.DefinerProperty(this=f"{user}@{host}") 2488 2489 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2490 self._match(TokenType.TABLE) 2491 self._match(TokenType.EQ) 2492 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2493 2494 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2495 return self.expression(exp.LogProperty, no=no) 2496 2497 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2498 return self.expression(exp.JournalProperty, **kwargs) 2499 2500 def _parse_checksum(self) -> exp.ChecksumProperty: 2501 self._match(TokenType.EQ) 2502 2503 on = None 2504 if self._match(TokenType.ON): 2505 on = True 2506 elif self._match_text_seq("OFF"): 2507 on = False 2508 2509 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2510 2511 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2512 return self.expression( 2513 exp.Cluster, 2514 expressions=( 2515 self._parse_wrapped_csv(self._parse_ordered) 2516 if wrapped 2517 else self._parse_csv(self._parse_ordered) 2518 ), 2519 ) 2520 2521 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2522 self._match_text_seq("BY") 2523 2524 self._match_l_paren() 2525 expressions = self._parse_csv(self._parse_column) 2526 self._match_r_paren() 2527 2528 if self._match_text_seq("SORTED", "BY"): 2529 self._match_l_paren() 2530 sorted_by = self._parse_csv(self._parse_ordered) 2531 self._match_r_paren() 2532 else: 2533 sorted_by = None 2534 2535 self._match(TokenType.INTO) 2536 buckets = self._parse_number() 2537 self._match_text_seq("BUCKETS") 2538 2539 return self.expression( 2540 exp.ClusteredByProperty, 2541 expressions=expressions, 2542 sorted_by=sorted_by, 2543 buckets=buckets, 2544 ) 2545 2546 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2547 if not self._match_text_seq("GRANTS"): 2548 self._retreat(self._index - 1) 2549 return None 2550 2551 return self.expression(exp.CopyGrantsProperty) 2552 2553 def _parse_freespace(self) -> exp.FreespaceProperty: 2554 self._match(TokenType.EQ) 2555 return self.expression( 2556 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2557 ) 2558 2559 def _parse_mergeblockratio( 2560 self, no: bool = False, default: bool = False 2561 ) -> exp.MergeBlockRatioProperty: 2562 if self._match(TokenType.EQ): 2563 return self.expression( 2564 exp.MergeBlockRatioProperty, 2565 this=self._parse_number(), 2566 percent=self._match(TokenType.PERCENT), 2567 ) 2568 2569 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2570 2571 def _parse_datablocksize( 2572 self, 2573 default: t.Optional[bool] = None, 2574 minimum: t.Optional[bool] = None, 2575 maximum: t.Optional[bool] = None, 2576 ) -> exp.DataBlocksizeProperty: 2577 self._match(TokenType.EQ) 2578 size = self._parse_number() 2579 2580 units = None 2581 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2582 units = self._prev.text 2583 2584 return self.expression( 2585 exp.DataBlocksizeProperty, 2586 size=size, 2587 units=units, 2588 default=default, 2589 minimum=minimum, 2590 maximum=maximum, 2591 ) 2592 2593 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2594 self._match(TokenType.EQ) 2595 always = self._match_text_seq("ALWAYS") 2596 manual = self._match_text_seq("MANUAL") 2597 never = self._match_text_seq("NEVER") 2598 default = self._match_text_seq("DEFAULT") 2599 2600 autotemp = None 2601 if self._match_text_seq("AUTOTEMP"): 2602 autotemp = self._parse_schema() 2603 2604 return self.expression( 2605 exp.BlockCompressionProperty, 2606 always=always, 2607 manual=manual, 2608 never=never, 2609 default=default, 2610 autotemp=autotemp, 2611 ) 2612 2613 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2614 index = self._index 2615 no = self._match_text_seq("NO") 2616 concurrent = self._match_text_seq("CONCURRENT") 2617 2618 if not self._match_text_seq("ISOLATED", "LOADING"): 2619 self._retreat(index) 2620 return None 2621 2622 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2623 return self.expression( 2624 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2625 ) 2626 2627 def _parse_locking(self) -> exp.LockingProperty: 2628 if self._match(TokenType.TABLE): 2629 kind = "TABLE" 2630 elif self._match(TokenType.VIEW): 2631 kind = "VIEW" 2632 elif self._match(TokenType.ROW): 2633 kind = "ROW" 2634 elif self._match_text_seq("DATABASE"): 2635 kind = "DATABASE" 2636 else: 2637 kind = None 2638 2639 if kind in ("DATABASE", "TABLE", "VIEW"): 2640 this = self._parse_table_parts() 2641 else: 2642 this = None 2643 2644 if self._match(TokenType.FOR): 2645 for_or_in = "FOR" 2646 elif self._match(TokenType.IN): 2647 for_or_in = "IN" 2648 else: 2649 for_or_in = None 2650 2651 if self._match_text_seq("ACCESS"): 2652 lock_type = "ACCESS" 2653 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2654 lock_type = "EXCLUSIVE" 2655 elif self._match_text_seq("SHARE"): 2656 lock_type = "SHARE" 2657 elif self._match_text_seq("READ"): 2658 lock_type = "READ" 2659 elif self._match_text_seq("WRITE"): 2660 lock_type = "WRITE" 2661 elif self._match_text_seq("CHECKSUM"): 2662 lock_type = "CHECKSUM" 2663 else: 2664 lock_type = None 2665 2666 override = self._match_text_seq("OVERRIDE") 2667 2668 return self.expression( 2669 exp.LockingProperty, 2670 this=this, 2671 kind=kind, 2672 for_or_in=for_or_in, 2673 lock_type=lock_type, 2674 override=override, 2675 ) 2676 2677 def _parse_partition_by(self) -> t.List[exp.Expression]: 2678 if self._match(TokenType.PARTITION_BY): 2679 return self._parse_csv(self._parse_assignment) 2680 return [] 2681 2682 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2683 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2684 if self._match_text_seq("MINVALUE"): 2685 return exp.var("MINVALUE") 2686 if self._match_text_seq("MAXVALUE"): 2687 return exp.var("MAXVALUE") 2688 return self._parse_bitwise() 2689 2690 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2691 expression = None 2692 from_expressions = None 2693 to_expressions = None 2694 2695 if self._match(TokenType.IN): 2696 this = self._parse_wrapped_csv(self._parse_bitwise) 2697 elif self._match(TokenType.FROM): 2698 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2699 self._match_text_seq("TO") 2700 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2701 elif self._match_text_seq("WITH", "(", "MODULUS"): 2702 this = self._parse_number() 2703 self._match_text_seq(",", "REMAINDER") 2704 expression = self._parse_number() 2705 self._match_r_paren() 2706 else: 2707 self.raise_error("Failed to parse partition bound spec.") 2708 2709 return self.expression( 2710 exp.PartitionBoundSpec, 2711 this=this, 2712 expression=expression, 2713 from_expressions=from_expressions, 2714 to_expressions=to_expressions, 2715 ) 2716 2717 # https://www.postgresql.org/docs/current/sql-createtable.html 2718 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2719 if not self._match_text_seq("OF"): 2720 self._retreat(self._index - 1) 2721 return None 2722 2723 this = self._parse_table(schema=True) 2724 2725 if self._match(TokenType.DEFAULT): 2726 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2727 elif self._match_text_seq("FOR", "VALUES"): 2728 expression = self._parse_partition_bound_spec() 2729 else: 2730 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2731 2732 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2733 2734 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2735 self._match(TokenType.EQ) 2736 return self.expression( 2737 exp.PartitionedByProperty, 2738 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2739 ) 2740 2741 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2742 if self._match_text_seq("AND", "STATISTICS"): 2743 statistics = True 2744 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2745 statistics = False 2746 else: 2747 statistics = None 2748 2749 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2750 2751 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2752 if self._match_text_seq("SQL"): 2753 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2754 return None 2755 2756 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2757 if self._match_text_seq("SQL", "DATA"): 2758 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2759 return None 2760 2761 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2762 if self._match_text_seq("PRIMARY", "INDEX"): 2763 return exp.NoPrimaryIndexProperty() 2764 if self._match_text_seq("SQL"): 2765 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2766 return None 2767 2768 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2769 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2770 return exp.OnCommitProperty() 2771 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2772 return exp.OnCommitProperty(delete=True) 2773 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2774 2775 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2776 if self._match_text_seq("SQL", "DATA"): 2777 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2778 return None 2779 2780 def _parse_distkey(self) -> exp.DistKeyProperty: 2781 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2782 2783 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2784 table = self._parse_table(schema=True) 2785 2786 options = [] 2787 while self._match_texts(("INCLUDING", "EXCLUDING")): 2788 this = self._prev.text.upper() 2789 2790 id_var = self._parse_id_var() 2791 if not id_var: 2792 return None 2793 2794 options.append( 2795 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2796 ) 2797 2798 return self.expression(exp.LikeProperty, this=table, expressions=options) 2799 2800 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2801 return self.expression( 2802 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2803 ) 2804 2805 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2806 self._match(TokenType.EQ) 2807 return self.expression( 2808 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2809 ) 2810 2811 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2812 self._match_text_seq("WITH", "CONNECTION") 2813 return self.expression( 2814 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2815 ) 2816 2817 def _parse_returns(self) -> exp.ReturnsProperty: 2818 value: t.Optional[exp.Expression] 2819 null = None 2820 is_table = self._match(TokenType.TABLE) 2821 2822 if is_table: 2823 if self._match(TokenType.LT): 2824 value = self.expression( 2825 exp.Schema, 2826 this="TABLE", 2827 expressions=self._parse_csv(self._parse_struct_types), 2828 ) 2829 if not self._match(TokenType.GT): 2830 self.raise_error("Expecting >") 2831 else: 2832 value = self._parse_schema(exp.var("TABLE")) 2833 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2834 null = True 2835 value = None 2836 else: 2837 value = self._parse_types() 2838 2839 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2840 2841 def _parse_describe(self) -> exp.Describe: 2842 kind = self._match_set(self.CREATABLES) and self._prev.text 2843 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2844 if self._match(TokenType.DOT): 2845 style = None 2846 self._retreat(self._index - 2) 2847 2848 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2849 2850 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2851 this = self._parse_statement() 2852 else: 2853 this = self._parse_table(schema=True) 2854 2855 properties = self._parse_properties() 2856 expressions = properties.expressions if properties else None 2857 partition = self._parse_partition() 2858 return self.expression( 2859 exp.Describe, 2860 this=this, 2861 style=style, 2862 kind=kind, 2863 expressions=expressions, 2864 partition=partition, 2865 format=format, 2866 ) 2867 2868 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2869 kind = self._prev.text.upper() 2870 expressions = [] 2871 2872 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2873 if self._match(TokenType.WHEN): 2874 expression = self._parse_disjunction() 2875 self._match(TokenType.THEN) 2876 else: 2877 expression = None 2878 2879 else_ = self._match(TokenType.ELSE) 2880 2881 if not self._match(TokenType.INTO): 2882 return None 2883 2884 return self.expression( 2885 exp.ConditionalInsert, 2886 this=self.expression( 2887 exp.Insert, 2888 this=self._parse_table(schema=True), 2889 expression=self._parse_derived_table_values(), 2890 ), 2891 expression=expression, 2892 else_=else_, 2893 ) 2894 2895 expression = parse_conditional_insert() 2896 while expression is not None: 2897 expressions.append(expression) 2898 expression = parse_conditional_insert() 2899 2900 return self.expression( 2901 exp.MultitableInserts, 2902 kind=kind, 2903 comments=comments, 2904 expressions=expressions, 2905 source=self._parse_table(), 2906 ) 2907 2908 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2909 comments = [] 2910 hint = self._parse_hint() 2911 overwrite = self._match(TokenType.OVERWRITE) 2912 ignore = self._match(TokenType.IGNORE) 2913 local = self._match_text_seq("LOCAL") 2914 alternative = None 2915 is_function = None 2916 2917 if self._match_text_seq("DIRECTORY"): 2918 this: t.Optional[exp.Expression] = self.expression( 2919 exp.Directory, 2920 this=self._parse_var_or_string(), 2921 local=local, 2922 row_format=self._parse_row_format(match_row=True), 2923 ) 2924 else: 2925 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2926 comments += ensure_list(self._prev_comments) 2927 return self._parse_multitable_inserts(comments) 2928 2929 if self._match(TokenType.OR): 2930 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2931 2932 self._match(TokenType.INTO) 2933 comments += ensure_list(self._prev_comments) 2934 self._match(TokenType.TABLE) 2935 is_function = self._match(TokenType.FUNCTION) 2936 2937 this = ( 2938 self._parse_table(schema=True, parse_partition=True) 2939 if not is_function 2940 else self._parse_function() 2941 ) 2942 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2943 this.set("alias", self._parse_table_alias()) 2944 2945 returning = self._parse_returning() 2946 2947 return self.expression( 2948 exp.Insert, 2949 comments=comments, 2950 hint=hint, 2951 is_function=is_function, 2952 this=this, 2953 stored=self._match_text_seq("STORED") and self._parse_stored(), 2954 by_name=self._match_text_seq("BY", "NAME"), 2955 exists=self._parse_exists(), 2956 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2957 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2958 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2959 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2960 conflict=self._parse_on_conflict(), 2961 returning=returning or self._parse_returning(), 2962 overwrite=overwrite, 2963 alternative=alternative, 2964 ignore=ignore, 2965 source=self._match(TokenType.TABLE) and self._parse_table(), 2966 ) 2967 2968 def _parse_kill(self) -> exp.Kill: 2969 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2970 2971 return self.expression( 2972 exp.Kill, 2973 this=self._parse_primary(), 2974 kind=kind, 2975 ) 2976 2977 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2978 conflict = self._match_text_seq("ON", "CONFLICT") 2979 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2980 2981 if not conflict and not duplicate: 2982 return None 2983 2984 conflict_keys = None 2985 constraint = None 2986 2987 if conflict: 2988 if self._match_text_seq("ON", "CONSTRAINT"): 2989 constraint = self._parse_id_var() 2990 elif self._match(TokenType.L_PAREN): 2991 conflict_keys = self._parse_csv(self._parse_id_var) 2992 self._match_r_paren() 2993 2994 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2995 if self._prev.token_type == TokenType.UPDATE: 2996 self._match(TokenType.SET) 2997 expressions = self._parse_csv(self._parse_equality) 2998 else: 2999 expressions = None 3000 3001 return self.expression( 3002 exp.OnConflict, 3003 duplicate=duplicate, 3004 expressions=expressions, 3005 action=action, 3006 conflict_keys=conflict_keys, 3007 constraint=constraint, 3008 where=self._parse_where(), 3009 ) 3010 3011 def _parse_returning(self) -> t.Optional[exp.Returning]: 3012 if not self._match(TokenType.RETURNING): 3013 return None 3014 return self.expression( 3015 exp.Returning, 3016 expressions=self._parse_csv(self._parse_expression), 3017 into=self._match(TokenType.INTO) and self._parse_table_part(), 3018 ) 3019 3020 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3021 if not self._match(TokenType.FORMAT): 3022 return None 3023 return self._parse_row_format() 3024 3025 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3026 index = self._index 3027 with_ = with_ or self._match_text_seq("WITH") 3028 3029 if not self._match(TokenType.SERDE_PROPERTIES): 3030 self._retreat(index) 3031 return None 3032 return self.expression( 3033 exp.SerdeProperties, 3034 **{ # type: ignore 3035 "expressions": self._parse_wrapped_properties(), 3036 "with": with_, 3037 }, 3038 ) 3039 3040 def _parse_row_format( 3041 self, match_row: bool = False 3042 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3043 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3044 return None 3045 3046 if self._match_text_seq("SERDE"): 3047 this = self._parse_string() 3048 3049 serde_properties = self._parse_serde_properties() 3050 3051 return self.expression( 3052 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3053 ) 3054 3055 self._match_text_seq("DELIMITED") 3056 3057 kwargs = {} 3058 3059 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3060 kwargs["fields"] = self._parse_string() 3061 if self._match_text_seq("ESCAPED", "BY"): 3062 kwargs["escaped"] = self._parse_string() 3063 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3064 kwargs["collection_items"] = self._parse_string() 3065 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3066 kwargs["map_keys"] = self._parse_string() 3067 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3068 kwargs["lines"] = self._parse_string() 3069 if self._match_text_seq("NULL", "DEFINED", "AS"): 3070 kwargs["null"] = self._parse_string() 3071 3072 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3073 3074 def _parse_load(self) -> exp.LoadData | exp.Command: 3075 if self._match_text_seq("DATA"): 3076 local = self._match_text_seq("LOCAL") 3077 self._match_text_seq("INPATH") 3078 inpath = self._parse_string() 3079 overwrite = self._match(TokenType.OVERWRITE) 3080 self._match_pair(TokenType.INTO, TokenType.TABLE) 3081 3082 return self.expression( 3083 exp.LoadData, 3084 this=self._parse_table(schema=True), 3085 local=local, 3086 overwrite=overwrite, 3087 inpath=inpath, 3088 partition=self._parse_partition(), 3089 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3090 serde=self._match_text_seq("SERDE") and self._parse_string(), 3091 ) 3092 return self._parse_as_command(self._prev) 3093 3094 def _parse_delete(self) -> exp.Delete: 3095 # This handles MySQL's "Multiple-Table Syntax" 3096 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3097 tables = None 3098 if not self._match(TokenType.FROM, advance=False): 3099 tables = self._parse_csv(self._parse_table) or None 3100 3101 returning = self._parse_returning() 3102 3103 return self.expression( 3104 exp.Delete, 3105 tables=tables, 3106 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3107 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3108 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3109 where=self._parse_where(), 3110 returning=returning or self._parse_returning(), 3111 limit=self._parse_limit(), 3112 ) 3113 3114 def _parse_update(self) -> exp.Update: 3115 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3116 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3117 returning = self._parse_returning() 3118 return self.expression( 3119 exp.Update, 3120 **{ # type: ignore 3121 "this": this, 3122 "expressions": expressions, 3123 "from": self._parse_from(joins=True), 3124 "where": self._parse_where(), 3125 "returning": returning or self._parse_returning(), 3126 "order": self._parse_order(), 3127 "limit": self._parse_limit(), 3128 }, 3129 ) 3130 3131 def _parse_use(self) -> exp.Use: 3132 return self.expression( 3133 exp.Use, 3134 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3135 this=self._parse_table(schema=False), 3136 ) 3137 3138 def _parse_uncache(self) -> exp.Uncache: 3139 if not self._match(TokenType.TABLE): 3140 self.raise_error("Expecting TABLE after UNCACHE") 3141 3142 return self.expression( 3143 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3144 ) 3145 3146 def _parse_cache(self) -> exp.Cache: 3147 lazy = self._match_text_seq("LAZY") 3148 self._match(TokenType.TABLE) 3149 table = self._parse_table(schema=True) 3150 3151 options = [] 3152 if self._match_text_seq("OPTIONS"): 3153 self._match_l_paren() 3154 k = self._parse_string() 3155 self._match(TokenType.EQ) 3156 v = self._parse_string() 3157 options = [k, v] 3158 self._match_r_paren() 3159 3160 self._match(TokenType.ALIAS) 3161 return self.expression( 3162 exp.Cache, 3163 this=table, 3164 lazy=lazy, 3165 options=options, 3166 expression=self._parse_select(nested=True), 3167 ) 3168 3169 def _parse_partition(self) -> t.Optional[exp.Partition]: 3170 if not self._match_texts(self.PARTITION_KEYWORDS): 3171 return None 3172 3173 return self.expression( 3174 exp.Partition, 3175 subpartition=self._prev.text.upper() == "SUBPARTITION", 3176 expressions=self._parse_wrapped_csv(self._parse_assignment), 3177 ) 3178 3179 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3180 def _parse_value_expression() -> t.Optional[exp.Expression]: 3181 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3182 return exp.var(self._prev.text.upper()) 3183 return self._parse_expression() 3184 3185 if self._match(TokenType.L_PAREN): 3186 expressions = self._parse_csv(_parse_value_expression) 3187 self._match_r_paren() 3188 return self.expression(exp.Tuple, expressions=expressions) 3189 3190 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3191 expression = self._parse_expression() 3192 if expression: 3193 return self.expression(exp.Tuple, expressions=[expression]) 3194 return None 3195 3196 def _parse_projections(self) -> t.List[exp.Expression]: 3197 return self._parse_expressions() 3198 3199 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3200 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3201 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3202 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3203 ) 3204 elif self._match(TokenType.FROM): 3205 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3206 # Support parentheses for duckdb FROM-first syntax 3207 select = self._parse_select() 3208 if select: 3209 select.set("from", from_) 3210 this = select 3211 else: 3212 this = exp.select("*").from_(t.cast(exp.From, from_)) 3213 else: 3214 this = ( 3215 self._parse_table(consume_pipe=True) 3216 if table 3217 else self._parse_select(nested=True, parse_set_operation=False) 3218 ) 3219 3220 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3221 # in case a modifier (e.g. join) is following 3222 if table and isinstance(this, exp.Values) and this.alias: 3223 alias = this.args["alias"].pop() 3224 this = exp.Table(this=this, alias=alias) 3225 3226 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3227 3228 return this 3229 3230 def _parse_select( 3231 self, 3232 nested: bool = False, 3233 table: bool = False, 3234 parse_subquery_alias: bool = True, 3235 parse_set_operation: bool = True, 3236 consume_pipe: bool = True, 3237 ) -> t.Optional[exp.Expression]: 3238 query = self._parse_select_query( 3239 nested=nested, 3240 table=table, 3241 parse_subquery_alias=parse_subquery_alias, 3242 parse_set_operation=parse_set_operation, 3243 ) 3244 3245 if ( 3246 consume_pipe 3247 and self._match(TokenType.PIPE_GT, advance=False) 3248 and isinstance(query, exp.Query) 3249 ): 3250 query = self._parse_pipe_syntax_query(query) 3251 query = query.subquery(copy=False) if query and table else query 3252 3253 return query 3254 3255 def _parse_select_query( 3256 self, 3257 nested: bool = False, 3258 table: bool = False, 3259 parse_subquery_alias: bool = True, 3260 parse_set_operation: bool = True, 3261 ) -> t.Optional[exp.Expression]: 3262 cte = self._parse_with() 3263 3264 if cte: 3265 this = self._parse_statement() 3266 3267 if not this: 3268 self.raise_error("Failed to parse any statement following CTE") 3269 return cte 3270 3271 if "with" in this.arg_types: 3272 this.set("with", cte) 3273 else: 3274 self.raise_error(f"{this.key} does not support CTE") 3275 this = cte 3276 3277 return this 3278 3279 # duckdb supports leading with FROM x 3280 from_ = ( 3281 self._parse_from(consume_pipe=True) 3282 if self._match(TokenType.FROM, advance=False) 3283 else None 3284 ) 3285 3286 if self._match(TokenType.SELECT): 3287 comments = self._prev_comments 3288 3289 hint = self._parse_hint() 3290 3291 if self._next and not self._next.token_type == TokenType.DOT: 3292 all_ = self._match(TokenType.ALL) 3293 distinct = self._match_set(self.DISTINCT_TOKENS) 3294 else: 3295 all_, distinct = None, None 3296 3297 kind = ( 3298 self._match(TokenType.ALIAS) 3299 and self._match_texts(("STRUCT", "VALUE")) 3300 and self._prev.text.upper() 3301 ) 3302 3303 if distinct: 3304 distinct = self.expression( 3305 exp.Distinct, 3306 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3307 ) 3308 3309 if all_ and distinct: 3310 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3311 3312 operation_modifiers = [] 3313 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3314 operation_modifiers.append(exp.var(self._prev.text.upper())) 3315 3316 limit = self._parse_limit(top=True) 3317 projections = self._parse_projections() 3318 3319 this = self.expression( 3320 exp.Select, 3321 kind=kind, 3322 hint=hint, 3323 distinct=distinct, 3324 expressions=projections, 3325 limit=limit, 3326 operation_modifiers=operation_modifiers or None, 3327 ) 3328 this.comments = comments 3329 3330 into = self._parse_into() 3331 if into: 3332 this.set("into", into) 3333 3334 if not from_: 3335 from_ = self._parse_from() 3336 3337 if from_: 3338 this.set("from", from_) 3339 3340 this = self._parse_query_modifiers(this) 3341 elif (table or nested) and self._match(TokenType.L_PAREN): 3342 this = self._parse_wrapped_select(table=table) 3343 3344 # We return early here so that the UNION isn't attached to the subquery by the 3345 # following call to _parse_set_operations, but instead becomes the parent node 3346 self._match_r_paren() 3347 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3348 elif self._match(TokenType.VALUES, advance=False): 3349 this = self._parse_derived_table_values() 3350 elif from_: 3351 this = exp.select("*").from_(from_.this, copy=False) 3352 elif self._match(TokenType.SUMMARIZE): 3353 table = self._match(TokenType.TABLE) 3354 this = self._parse_select() or self._parse_string() or self._parse_table() 3355 return self.expression(exp.Summarize, this=this, table=table) 3356 elif self._match(TokenType.DESCRIBE): 3357 this = self._parse_describe() 3358 elif self._match_text_seq("STREAM"): 3359 this = self._parse_function() 3360 if this: 3361 this = self.expression(exp.Stream, this=this) 3362 else: 3363 self._retreat(self._index - 1) 3364 else: 3365 this = None 3366 3367 return self._parse_set_operations(this) if parse_set_operation else this 3368 3369 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3370 self._match_text_seq("SEARCH") 3371 3372 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3373 3374 if not kind: 3375 return None 3376 3377 self._match_text_seq("FIRST", "BY") 3378 3379 return self.expression( 3380 exp.RecursiveWithSearch, 3381 kind=kind, 3382 this=self._parse_id_var(), 3383 expression=self._match_text_seq("SET") and self._parse_id_var(), 3384 using=self._match_text_seq("USING") and self._parse_id_var(), 3385 ) 3386 3387 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3388 if not skip_with_token and not self._match(TokenType.WITH): 3389 return None 3390 3391 comments = self._prev_comments 3392 recursive = self._match(TokenType.RECURSIVE) 3393 3394 last_comments = None 3395 expressions = [] 3396 while True: 3397 cte = self._parse_cte() 3398 if isinstance(cte, exp.CTE): 3399 expressions.append(cte) 3400 if last_comments: 3401 cte.add_comments(last_comments) 3402 3403 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3404 break 3405 else: 3406 self._match(TokenType.WITH) 3407 3408 last_comments = self._prev_comments 3409 3410 return self.expression( 3411 exp.With, 3412 comments=comments, 3413 expressions=expressions, 3414 recursive=recursive, 3415 search=self._parse_recursive_with_search(), 3416 ) 3417 3418 def _parse_cte(self) -> t.Optional[exp.CTE]: 3419 index = self._index 3420 3421 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3422 if not alias or not alias.this: 3423 self.raise_error("Expected CTE to have alias") 3424 3425 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3426 self._retreat(index) 3427 return None 3428 3429 comments = self._prev_comments 3430 3431 if self._match_text_seq("NOT", "MATERIALIZED"): 3432 materialized = False 3433 elif self._match_text_seq("MATERIALIZED"): 3434 materialized = True 3435 else: 3436 materialized = None 3437 3438 cte = self.expression( 3439 exp.CTE, 3440 this=self._parse_wrapped(self._parse_statement), 3441 alias=alias, 3442 materialized=materialized, 3443 comments=comments, 3444 ) 3445 3446 values = cte.this 3447 if isinstance(values, exp.Values): 3448 if values.alias: 3449 cte.set("this", exp.select("*").from_(values)) 3450 else: 3451 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3452 3453 return cte 3454 3455 def _parse_table_alias( 3456 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3457 ) -> t.Optional[exp.TableAlias]: 3458 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3459 # so this section tries to parse the clause version and if it fails, it treats the token 3460 # as an identifier (alias) 3461 if self._can_parse_limit_or_offset(): 3462 return None 3463 3464 any_token = self._match(TokenType.ALIAS) 3465 alias = ( 3466 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3467 or self._parse_string_as_identifier() 3468 ) 3469 3470 index = self._index 3471 if self._match(TokenType.L_PAREN): 3472 columns = self._parse_csv(self._parse_function_parameter) 3473 self._match_r_paren() if columns else self._retreat(index) 3474 else: 3475 columns = None 3476 3477 if not alias and not columns: 3478 return None 3479 3480 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3481 3482 # We bubble up comments from the Identifier to the TableAlias 3483 if isinstance(alias, exp.Identifier): 3484 table_alias.add_comments(alias.pop_comments()) 3485 3486 return table_alias 3487 3488 def _parse_subquery( 3489 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3490 ) -> t.Optional[exp.Subquery]: 3491 if not this: 3492 return None 3493 3494 return self.expression( 3495 exp.Subquery, 3496 this=this, 3497 pivots=self._parse_pivots(), 3498 alias=self._parse_table_alias() if parse_alias else None, 3499 sample=self._parse_table_sample(), 3500 ) 3501 3502 def _implicit_unnests_to_explicit(self, this: E) -> E: 3503 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3504 3505 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3506 for i, join in enumerate(this.args.get("joins") or []): 3507 table = join.this 3508 normalized_table = table.copy() 3509 normalized_table.meta["maybe_column"] = True 3510 normalized_table = _norm(normalized_table, dialect=self.dialect) 3511 3512 if isinstance(table, exp.Table) and not join.args.get("on"): 3513 if normalized_table.parts[0].name in refs: 3514 table_as_column = table.to_column() 3515 unnest = exp.Unnest(expressions=[table_as_column]) 3516 3517 # Table.to_column creates a parent Alias node that we want to convert to 3518 # a TableAlias and attach to the Unnest, so it matches the parser's output 3519 if isinstance(table.args.get("alias"), exp.TableAlias): 3520 table_as_column.replace(table_as_column.this) 3521 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3522 3523 table.replace(unnest) 3524 3525 refs.add(normalized_table.alias_or_name) 3526 3527 return this 3528 3529 def _parse_query_modifiers( 3530 self, this: t.Optional[exp.Expression] 3531 ) -> t.Optional[exp.Expression]: 3532 if isinstance(this, self.MODIFIABLES): 3533 for join in self._parse_joins(): 3534 this.append("joins", join) 3535 for lateral in iter(self._parse_lateral, None): 3536 this.append("laterals", lateral) 3537 3538 while True: 3539 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3540 modifier_token = self._curr 3541 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3542 key, expression = parser(self) 3543 3544 if expression: 3545 if this.args.get(key): 3546 self.raise_error( 3547 f"Found multiple '{modifier_token.text.upper()}' clauses", 3548 token=modifier_token, 3549 ) 3550 3551 this.set(key, expression) 3552 if key == "limit": 3553 offset = expression.args.pop("offset", None) 3554 3555 if offset: 3556 offset = exp.Offset(expression=offset) 3557 this.set("offset", offset) 3558 3559 limit_by_expressions = expression.expressions 3560 expression.set("expressions", None) 3561 offset.set("expressions", limit_by_expressions) 3562 continue 3563 break 3564 3565 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3566 this = self._implicit_unnests_to_explicit(this) 3567 3568 return this 3569 3570 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3571 start = self._curr 3572 while self._curr: 3573 self._advance() 3574 3575 end = self._tokens[self._index - 1] 3576 return exp.Hint(expressions=[self._find_sql(start, end)]) 3577 3578 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3579 return self._parse_function_call() 3580 3581 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3582 start_index = self._index 3583 should_fallback_to_string = False 3584 3585 hints = [] 3586 try: 3587 for hint in iter( 3588 lambda: self._parse_csv( 3589 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3590 ), 3591 [], 3592 ): 3593 hints.extend(hint) 3594 except ParseError: 3595 should_fallback_to_string = True 3596 3597 if should_fallback_to_string or self._curr: 3598 self._retreat(start_index) 3599 return self._parse_hint_fallback_to_string() 3600 3601 return self.expression(exp.Hint, expressions=hints) 3602 3603 def _parse_hint(self) -> t.Optional[exp.Hint]: 3604 if self._match(TokenType.HINT) and self._prev_comments: 3605 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3606 3607 return None 3608 3609 def _parse_into(self) -> t.Optional[exp.Into]: 3610 if not self._match(TokenType.INTO): 3611 return None 3612 3613 temp = self._match(TokenType.TEMPORARY) 3614 unlogged = self._match_text_seq("UNLOGGED") 3615 self._match(TokenType.TABLE) 3616 3617 return self.expression( 3618 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3619 ) 3620 3621 def _parse_from( 3622 self, 3623 joins: bool = False, 3624 skip_from_token: bool = False, 3625 consume_pipe: bool = False, 3626 ) -> t.Optional[exp.From]: 3627 if not skip_from_token and not self._match(TokenType.FROM): 3628 return None 3629 3630 return self.expression( 3631 exp.From, 3632 comments=self._prev_comments, 3633 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3634 ) 3635 3636 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3637 return self.expression( 3638 exp.MatchRecognizeMeasure, 3639 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3640 this=self._parse_expression(), 3641 ) 3642 3643 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3644 if not self._match(TokenType.MATCH_RECOGNIZE): 3645 return None 3646 3647 self._match_l_paren() 3648 3649 partition = self._parse_partition_by() 3650 order = self._parse_order() 3651 3652 measures = ( 3653 self._parse_csv(self._parse_match_recognize_measure) 3654 if self._match_text_seq("MEASURES") 3655 else None 3656 ) 3657 3658 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3659 rows = exp.var("ONE ROW PER MATCH") 3660 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3661 text = "ALL ROWS PER MATCH" 3662 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3663 text += " SHOW EMPTY MATCHES" 3664 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3665 text += " OMIT EMPTY MATCHES" 3666 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3667 text += " WITH UNMATCHED ROWS" 3668 rows = exp.var(text) 3669 else: 3670 rows = None 3671 3672 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3673 text = "AFTER MATCH SKIP" 3674 if self._match_text_seq("PAST", "LAST", "ROW"): 3675 text += " PAST LAST ROW" 3676 elif self._match_text_seq("TO", "NEXT", "ROW"): 3677 text += " TO NEXT ROW" 3678 elif self._match_text_seq("TO", "FIRST"): 3679 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3680 elif self._match_text_seq("TO", "LAST"): 3681 text += f" TO LAST {self._advance_any().text}" # type: ignore 3682 after = exp.var(text) 3683 else: 3684 after = None 3685 3686 if self._match_text_seq("PATTERN"): 3687 self._match_l_paren() 3688 3689 if not self._curr: 3690 self.raise_error("Expecting )", self._curr) 3691 3692 paren = 1 3693 start = self._curr 3694 3695 while self._curr and paren > 0: 3696 if self._curr.token_type == TokenType.L_PAREN: 3697 paren += 1 3698 if self._curr.token_type == TokenType.R_PAREN: 3699 paren -= 1 3700 3701 end = self._prev 3702 self._advance() 3703 3704 if paren > 0: 3705 self.raise_error("Expecting )", self._curr) 3706 3707 pattern = exp.var(self._find_sql(start, end)) 3708 else: 3709 pattern = None 3710 3711 define = ( 3712 self._parse_csv(self._parse_name_as_expression) 3713 if self._match_text_seq("DEFINE") 3714 else None 3715 ) 3716 3717 self._match_r_paren() 3718 3719 return self.expression( 3720 exp.MatchRecognize, 3721 partition_by=partition, 3722 order=order, 3723 measures=measures, 3724 rows=rows, 3725 after=after, 3726 pattern=pattern, 3727 define=define, 3728 alias=self._parse_table_alias(), 3729 ) 3730 3731 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3732 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3733 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3734 cross_apply = False 3735 3736 if cross_apply is not None: 3737 this = self._parse_select(table=True) 3738 view = None 3739 outer = None 3740 elif self._match(TokenType.LATERAL): 3741 this = self._parse_select(table=True) 3742 view = self._match(TokenType.VIEW) 3743 outer = self._match(TokenType.OUTER) 3744 else: 3745 return None 3746 3747 if not this: 3748 this = ( 3749 self._parse_unnest() 3750 or self._parse_function() 3751 or self._parse_id_var(any_token=False) 3752 ) 3753 3754 while self._match(TokenType.DOT): 3755 this = exp.Dot( 3756 this=this, 3757 expression=self._parse_function() or self._parse_id_var(any_token=False), 3758 ) 3759 3760 ordinality: t.Optional[bool] = None 3761 3762 if view: 3763 table = self._parse_id_var(any_token=False) 3764 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3765 table_alias: t.Optional[exp.TableAlias] = self.expression( 3766 exp.TableAlias, this=table, columns=columns 3767 ) 3768 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3769 # We move the alias from the lateral's child node to the lateral itself 3770 table_alias = this.args["alias"].pop() 3771 else: 3772 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3773 table_alias = self._parse_table_alias() 3774 3775 return self.expression( 3776 exp.Lateral, 3777 this=this, 3778 view=view, 3779 outer=outer, 3780 alias=table_alias, 3781 cross_apply=cross_apply, 3782 ordinality=ordinality, 3783 ) 3784 3785 def _parse_join_parts( 3786 self, 3787 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3788 return ( 3789 self._match_set(self.JOIN_METHODS) and self._prev, 3790 self._match_set(self.JOIN_SIDES) and self._prev, 3791 self._match_set(self.JOIN_KINDS) and self._prev, 3792 ) 3793 3794 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3795 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3796 this = self._parse_column() 3797 if isinstance(this, exp.Column): 3798 return this.this 3799 return this 3800 3801 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3802 3803 def _parse_join( 3804 self, skip_join_token: bool = False, parse_bracket: bool = False 3805 ) -> t.Optional[exp.Join]: 3806 if self._match(TokenType.COMMA): 3807 table = self._try_parse(self._parse_table) 3808 cross_join = self.expression(exp.Join, this=table) if table else None 3809 3810 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3811 cross_join.set("kind", "CROSS") 3812 3813 return cross_join 3814 3815 index = self._index 3816 method, side, kind = self._parse_join_parts() 3817 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3818 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3819 join_comments = self._prev_comments 3820 3821 if not skip_join_token and not join: 3822 self._retreat(index) 3823 kind = None 3824 method = None 3825 side = None 3826 3827 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3828 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3829 3830 if not skip_join_token and not join and not outer_apply and not cross_apply: 3831 return None 3832 3833 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3834 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3835 kwargs["expressions"] = self._parse_csv( 3836 lambda: self._parse_table(parse_bracket=parse_bracket) 3837 ) 3838 3839 if method: 3840 kwargs["method"] = method.text 3841 if side: 3842 kwargs["side"] = side.text 3843 if kind: 3844 kwargs["kind"] = kind.text 3845 if hint: 3846 kwargs["hint"] = hint 3847 3848 if self._match(TokenType.MATCH_CONDITION): 3849 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3850 3851 if self._match(TokenType.ON): 3852 kwargs["on"] = self._parse_assignment() 3853 elif self._match(TokenType.USING): 3854 kwargs["using"] = self._parse_using_identifiers() 3855 elif ( 3856 not method 3857 and not (outer_apply or cross_apply) 3858 and not isinstance(kwargs["this"], exp.Unnest) 3859 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3860 ): 3861 index = self._index 3862 joins: t.Optional[list] = list(self._parse_joins()) 3863 3864 if joins and self._match(TokenType.ON): 3865 kwargs["on"] = self._parse_assignment() 3866 elif joins and self._match(TokenType.USING): 3867 kwargs["using"] = self._parse_using_identifiers() 3868 else: 3869 joins = None 3870 self._retreat(index) 3871 3872 kwargs["this"].set("joins", joins if joins else None) 3873 3874 kwargs["pivots"] = self._parse_pivots() 3875 3876 comments = [c for token in (method, side, kind) if token for c in token.comments] 3877 comments = (join_comments or []) + comments 3878 return self.expression(exp.Join, comments=comments, **kwargs) 3879 3880 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3881 this = self._parse_assignment() 3882 3883 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3884 return this 3885 3886 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3887 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3888 3889 return this 3890 3891 def _parse_index_params(self) -> exp.IndexParameters: 3892 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3893 3894 if self._match(TokenType.L_PAREN, advance=False): 3895 columns = self._parse_wrapped_csv(self._parse_with_operator) 3896 else: 3897 columns = None 3898 3899 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3900 partition_by = self._parse_partition_by() 3901 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3902 tablespace = ( 3903 self._parse_var(any_token=True) 3904 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3905 else None 3906 ) 3907 where = self._parse_where() 3908 3909 on = self._parse_field() if self._match(TokenType.ON) else None 3910 3911 return self.expression( 3912 exp.IndexParameters, 3913 using=using, 3914 columns=columns, 3915 include=include, 3916 partition_by=partition_by, 3917 where=where, 3918 with_storage=with_storage, 3919 tablespace=tablespace, 3920 on=on, 3921 ) 3922 3923 def _parse_index( 3924 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3925 ) -> t.Optional[exp.Index]: 3926 if index or anonymous: 3927 unique = None 3928 primary = None 3929 amp = None 3930 3931 self._match(TokenType.ON) 3932 self._match(TokenType.TABLE) # hive 3933 table = self._parse_table_parts(schema=True) 3934 else: 3935 unique = self._match(TokenType.UNIQUE) 3936 primary = self._match_text_seq("PRIMARY") 3937 amp = self._match_text_seq("AMP") 3938 3939 if not self._match(TokenType.INDEX): 3940 return None 3941 3942 index = self._parse_id_var() 3943 table = None 3944 3945 params = self._parse_index_params() 3946 3947 return self.expression( 3948 exp.Index, 3949 this=index, 3950 table=table, 3951 unique=unique, 3952 primary=primary, 3953 amp=amp, 3954 params=params, 3955 ) 3956 3957 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3958 hints: t.List[exp.Expression] = [] 3959 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3960 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3961 hints.append( 3962 self.expression( 3963 exp.WithTableHint, 3964 expressions=self._parse_csv( 3965 lambda: self._parse_function() or self._parse_var(any_token=True) 3966 ), 3967 ) 3968 ) 3969 self._match_r_paren() 3970 else: 3971 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3972 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3973 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3974 3975 self._match_set((TokenType.INDEX, TokenType.KEY)) 3976 if self._match(TokenType.FOR): 3977 hint.set("target", self._advance_any() and self._prev.text.upper()) 3978 3979 hint.set("expressions", self._parse_wrapped_id_vars()) 3980 hints.append(hint) 3981 3982 return hints or None 3983 3984 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3985 return ( 3986 (not schema and self._parse_function(optional_parens=False)) 3987 or self._parse_id_var(any_token=False) 3988 or self._parse_string_as_identifier() 3989 or self._parse_placeholder() 3990 ) 3991 3992 def _parse_table_parts( 3993 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3994 ) -> exp.Table: 3995 catalog = None 3996 db = None 3997 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3998 3999 while self._match(TokenType.DOT): 4000 if catalog: 4001 # This allows nesting the table in arbitrarily many dot expressions if needed 4002 table = self.expression( 4003 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4004 ) 4005 else: 4006 catalog = db 4007 db = table 4008 # "" used for tsql FROM a..b case 4009 table = self._parse_table_part(schema=schema) or "" 4010 4011 if ( 4012 wildcard 4013 and self._is_connected() 4014 and (isinstance(table, exp.Identifier) or not table) 4015 and self._match(TokenType.STAR) 4016 ): 4017 if isinstance(table, exp.Identifier): 4018 table.args["this"] += "*" 4019 else: 4020 table = exp.Identifier(this="*") 4021 4022 # We bubble up comments from the Identifier to the Table 4023 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4024 4025 if is_db_reference: 4026 catalog = db 4027 db = table 4028 table = None 4029 4030 if not table and not is_db_reference: 4031 self.raise_error(f"Expected table name but got {self._curr}") 4032 if not db and is_db_reference: 4033 self.raise_error(f"Expected database name but got {self._curr}") 4034 4035 table = self.expression( 4036 exp.Table, 4037 comments=comments, 4038 this=table, 4039 db=db, 4040 catalog=catalog, 4041 ) 4042 4043 changes = self._parse_changes() 4044 if changes: 4045 table.set("changes", changes) 4046 4047 at_before = self._parse_historical_data() 4048 if at_before: 4049 table.set("when", at_before) 4050 4051 pivots = self._parse_pivots() 4052 if pivots: 4053 table.set("pivots", pivots) 4054 4055 return table 4056 4057 def _parse_table( 4058 self, 4059 schema: bool = False, 4060 joins: bool = False, 4061 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4062 parse_bracket: bool = False, 4063 is_db_reference: bool = False, 4064 parse_partition: bool = False, 4065 consume_pipe: bool = False, 4066 ) -> t.Optional[exp.Expression]: 4067 lateral = self._parse_lateral() 4068 if lateral: 4069 return lateral 4070 4071 unnest = self._parse_unnest() 4072 if unnest: 4073 return unnest 4074 4075 values = self._parse_derived_table_values() 4076 if values: 4077 return values 4078 4079 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4080 if subquery: 4081 if not subquery.args.get("pivots"): 4082 subquery.set("pivots", self._parse_pivots()) 4083 return subquery 4084 4085 bracket = parse_bracket and self._parse_bracket(None) 4086 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4087 4088 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4089 self._parse_table 4090 ) 4091 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4092 4093 only = self._match(TokenType.ONLY) 4094 4095 this = t.cast( 4096 exp.Expression, 4097 bracket 4098 or rows_from 4099 or self._parse_bracket( 4100 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4101 ), 4102 ) 4103 4104 if only: 4105 this.set("only", only) 4106 4107 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4108 self._match_text_seq("*") 4109 4110 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4111 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4112 this.set("partition", self._parse_partition()) 4113 4114 if schema: 4115 return self._parse_schema(this=this) 4116 4117 version = self._parse_version() 4118 4119 if version: 4120 this.set("version", version) 4121 4122 if self.dialect.ALIAS_POST_TABLESAMPLE: 4123 this.set("sample", self._parse_table_sample()) 4124 4125 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4126 if alias: 4127 this.set("alias", alias) 4128 4129 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4130 return self.expression( 4131 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4132 ) 4133 4134 this.set("hints", self._parse_table_hints()) 4135 4136 if not this.args.get("pivots"): 4137 this.set("pivots", self._parse_pivots()) 4138 4139 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4140 this.set("sample", self._parse_table_sample()) 4141 4142 if joins: 4143 for join in self._parse_joins(): 4144 this.append("joins", join) 4145 4146 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4147 this.set("ordinality", True) 4148 this.set("alias", self._parse_table_alias()) 4149 4150 return this 4151 4152 def _parse_version(self) -> t.Optional[exp.Version]: 4153 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4154 this = "TIMESTAMP" 4155 elif self._match(TokenType.VERSION_SNAPSHOT): 4156 this = "VERSION" 4157 else: 4158 return None 4159 4160 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4161 kind = self._prev.text.upper() 4162 start = self._parse_bitwise() 4163 self._match_texts(("TO", "AND")) 4164 end = self._parse_bitwise() 4165 expression: t.Optional[exp.Expression] = self.expression( 4166 exp.Tuple, expressions=[start, end] 4167 ) 4168 elif self._match_text_seq("CONTAINED", "IN"): 4169 kind = "CONTAINED IN" 4170 expression = self.expression( 4171 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4172 ) 4173 elif self._match(TokenType.ALL): 4174 kind = "ALL" 4175 expression = None 4176 else: 4177 self._match_text_seq("AS", "OF") 4178 kind = "AS OF" 4179 expression = self._parse_type() 4180 4181 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4182 4183 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4184 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4185 index = self._index 4186 historical_data = None 4187 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4188 this = self._prev.text.upper() 4189 kind = ( 4190 self._match(TokenType.L_PAREN) 4191 and self._match_texts(self.HISTORICAL_DATA_KIND) 4192 and self._prev.text.upper() 4193 ) 4194 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4195 4196 if expression: 4197 self._match_r_paren() 4198 historical_data = self.expression( 4199 exp.HistoricalData, this=this, kind=kind, expression=expression 4200 ) 4201 else: 4202 self._retreat(index) 4203 4204 return historical_data 4205 4206 def _parse_changes(self) -> t.Optional[exp.Changes]: 4207 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4208 return None 4209 4210 information = self._parse_var(any_token=True) 4211 self._match_r_paren() 4212 4213 return self.expression( 4214 exp.Changes, 4215 information=information, 4216 at_before=self._parse_historical_data(), 4217 end=self._parse_historical_data(), 4218 ) 4219 4220 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4221 if not self._match(TokenType.UNNEST): 4222 return None 4223 4224 expressions = self._parse_wrapped_csv(self._parse_equality) 4225 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4226 4227 alias = self._parse_table_alias() if with_alias else None 4228 4229 if alias: 4230 if self.dialect.UNNEST_COLUMN_ONLY: 4231 if alias.args.get("columns"): 4232 self.raise_error("Unexpected extra column alias in unnest.") 4233 4234 alias.set("columns", [alias.this]) 4235 alias.set("this", None) 4236 4237 columns = alias.args.get("columns") or [] 4238 if offset and len(expressions) < len(columns): 4239 offset = columns.pop() 4240 4241 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4242 self._match(TokenType.ALIAS) 4243 offset = self._parse_id_var( 4244 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4245 ) or exp.to_identifier("offset") 4246 4247 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4248 4249 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4250 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4251 if not is_derived and not ( 4252 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4253 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4254 ): 4255 return None 4256 4257 expressions = self._parse_csv(self._parse_value) 4258 alias = self._parse_table_alias() 4259 4260 if is_derived: 4261 self._match_r_paren() 4262 4263 return self.expression( 4264 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4265 ) 4266 4267 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4268 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4269 as_modifier and self._match_text_seq("USING", "SAMPLE") 4270 ): 4271 return None 4272 4273 bucket_numerator = None 4274 bucket_denominator = None 4275 bucket_field = None 4276 percent = None 4277 size = None 4278 seed = None 4279 4280 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4281 matched_l_paren = self._match(TokenType.L_PAREN) 4282 4283 if self.TABLESAMPLE_CSV: 4284 num = None 4285 expressions = self._parse_csv(self._parse_primary) 4286 else: 4287 expressions = None 4288 num = ( 4289 self._parse_factor() 4290 if self._match(TokenType.NUMBER, advance=False) 4291 else self._parse_primary() or self._parse_placeholder() 4292 ) 4293 4294 if self._match_text_seq("BUCKET"): 4295 bucket_numerator = self._parse_number() 4296 self._match_text_seq("OUT", "OF") 4297 bucket_denominator = bucket_denominator = self._parse_number() 4298 self._match(TokenType.ON) 4299 bucket_field = self._parse_field() 4300 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4301 percent = num 4302 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4303 size = num 4304 else: 4305 percent = num 4306 4307 if matched_l_paren: 4308 self._match_r_paren() 4309 4310 if self._match(TokenType.L_PAREN): 4311 method = self._parse_var(upper=True) 4312 seed = self._match(TokenType.COMMA) and self._parse_number() 4313 self._match_r_paren() 4314 elif self._match_texts(("SEED", "REPEATABLE")): 4315 seed = self._parse_wrapped(self._parse_number) 4316 4317 if not method and self.DEFAULT_SAMPLING_METHOD: 4318 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4319 4320 return self.expression( 4321 exp.TableSample, 4322 expressions=expressions, 4323 method=method, 4324 bucket_numerator=bucket_numerator, 4325 bucket_denominator=bucket_denominator, 4326 bucket_field=bucket_field, 4327 percent=percent, 4328 size=size, 4329 seed=seed, 4330 ) 4331 4332 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4333 return list(iter(self._parse_pivot, None)) or None 4334 4335 def _parse_joins(self) -> t.Iterator[exp.Join]: 4336 return iter(self._parse_join, None) 4337 4338 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4339 if not self._match(TokenType.INTO): 4340 return None 4341 4342 return self.expression( 4343 exp.UnpivotColumns, 4344 this=self._match_text_seq("NAME") and self._parse_column(), 4345 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4346 ) 4347 4348 # https://duckdb.org/docs/sql/statements/pivot 4349 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4350 def _parse_on() -> t.Optional[exp.Expression]: 4351 this = self._parse_bitwise() 4352 4353 if self._match(TokenType.IN): 4354 # PIVOT ... ON col IN (row_val1, row_val2) 4355 return self._parse_in(this) 4356 if self._match(TokenType.ALIAS, advance=False): 4357 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4358 return self._parse_alias(this) 4359 4360 return this 4361 4362 this = self._parse_table() 4363 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4364 into = self._parse_unpivot_columns() 4365 using = self._match(TokenType.USING) and self._parse_csv( 4366 lambda: self._parse_alias(self._parse_function()) 4367 ) 4368 group = self._parse_group() 4369 4370 return self.expression( 4371 exp.Pivot, 4372 this=this, 4373 expressions=expressions, 4374 using=using, 4375 group=group, 4376 unpivot=is_unpivot, 4377 into=into, 4378 ) 4379 4380 def _parse_pivot_in(self) -> exp.In: 4381 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4382 this = self._parse_select_or_expression() 4383 4384 self._match(TokenType.ALIAS) 4385 alias = self._parse_bitwise() 4386 if alias: 4387 if isinstance(alias, exp.Column) and not alias.db: 4388 alias = alias.this 4389 return self.expression(exp.PivotAlias, this=this, alias=alias) 4390 4391 return this 4392 4393 value = self._parse_column() 4394 4395 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4396 self.raise_error("Expecting IN (") 4397 4398 if self._match(TokenType.ANY): 4399 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4400 else: 4401 exprs = self._parse_csv(_parse_aliased_expression) 4402 4403 self._match_r_paren() 4404 return self.expression(exp.In, this=value, expressions=exprs) 4405 4406 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4407 func = self._parse_function() 4408 if not func: 4409 self.raise_error("Expecting an aggregation function in PIVOT") 4410 4411 return self._parse_alias(func) 4412 4413 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4414 index = self._index 4415 include_nulls = None 4416 4417 if self._match(TokenType.PIVOT): 4418 unpivot = False 4419 elif self._match(TokenType.UNPIVOT): 4420 unpivot = True 4421 4422 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4423 if self._match_text_seq("INCLUDE", "NULLS"): 4424 include_nulls = True 4425 elif self._match_text_seq("EXCLUDE", "NULLS"): 4426 include_nulls = False 4427 else: 4428 return None 4429 4430 expressions = [] 4431 4432 if not self._match(TokenType.L_PAREN): 4433 self._retreat(index) 4434 return None 4435 4436 if unpivot: 4437 expressions = self._parse_csv(self._parse_column) 4438 else: 4439 expressions = self._parse_csv(self._parse_pivot_aggregation) 4440 4441 if not expressions: 4442 self.raise_error("Failed to parse PIVOT's aggregation list") 4443 4444 if not self._match(TokenType.FOR): 4445 self.raise_error("Expecting FOR") 4446 4447 fields = [] 4448 while True: 4449 field = self._try_parse(self._parse_pivot_in) 4450 if not field: 4451 break 4452 fields.append(field) 4453 4454 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4455 self._parse_bitwise 4456 ) 4457 4458 group = self._parse_group() 4459 4460 self._match_r_paren() 4461 4462 pivot = self.expression( 4463 exp.Pivot, 4464 expressions=expressions, 4465 fields=fields, 4466 unpivot=unpivot, 4467 include_nulls=include_nulls, 4468 default_on_null=default_on_null, 4469 group=group, 4470 ) 4471 4472 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4473 pivot.set("alias", self._parse_table_alias()) 4474 4475 if not unpivot: 4476 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4477 4478 columns: t.List[exp.Expression] = [] 4479 all_fields = [] 4480 for pivot_field in pivot.fields: 4481 pivot_field_expressions = pivot_field.expressions 4482 4483 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4484 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4485 continue 4486 4487 all_fields.append( 4488 [ 4489 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4490 for fld in pivot_field_expressions 4491 ] 4492 ) 4493 4494 if all_fields: 4495 if names: 4496 all_fields.append(names) 4497 4498 # Generate all possible combinations of the pivot columns 4499 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4500 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4501 for fld_parts_tuple in itertools.product(*all_fields): 4502 fld_parts = list(fld_parts_tuple) 4503 4504 if names and self.PREFIXED_PIVOT_COLUMNS: 4505 # Move the "name" to the front of the list 4506 fld_parts.insert(0, fld_parts.pop(-1)) 4507 4508 columns.append(exp.to_identifier("_".join(fld_parts))) 4509 4510 pivot.set("columns", columns) 4511 4512 return pivot 4513 4514 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4515 return [agg.alias for agg in aggregations if agg.alias] 4516 4517 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4518 if not skip_where_token and not self._match(TokenType.PREWHERE): 4519 return None 4520 4521 return self.expression( 4522 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4523 ) 4524 4525 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4526 if not skip_where_token and not self._match(TokenType.WHERE): 4527 return None 4528 4529 return self.expression( 4530 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4531 ) 4532 4533 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4534 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4535 return None 4536 comments = self._prev_comments 4537 4538 elements: t.Dict[str, t.Any] = defaultdict(list) 4539 4540 if self._match(TokenType.ALL): 4541 elements["all"] = True 4542 elif self._match(TokenType.DISTINCT): 4543 elements["all"] = False 4544 4545 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4546 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4547 4548 while True: 4549 index = self._index 4550 4551 elements["expressions"].extend( 4552 self._parse_csv( 4553 lambda: None 4554 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4555 else self._parse_assignment() 4556 ) 4557 ) 4558 4559 before_with_index = self._index 4560 with_prefix = self._match(TokenType.WITH) 4561 4562 if self._match(TokenType.ROLLUP): 4563 elements["rollup"].append( 4564 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4565 ) 4566 elif self._match(TokenType.CUBE): 4567 elements["cube"].append( 4568 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4569 ) 4570 elif self._match(TokenType.GROUPING_SETS): 4571 elements["grouping_sets"].append( 4572 self.expression( 4573 exp.GroupingSets, 4574 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4575 ) 4576 ) 4577 elif self._match_text_seq("TOTALS"): 4578 elements["totals"] = True # type: ignore 4579 4580 if before_with_index <= self._index <= before_with_index + 1: 4581 self._retreat(before_with_index) 4582 break 4583 4584 if index == self._index: 4585 break 4586 4587 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4588 4589 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4590 return self.expression( 4591 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4592 ) 4593 4594 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4595 if self._match(TokenType.L_PAREN): 4596 grouping_set = self._parse_csv(self._parse_column) 4597 self._match_r_paren() 4598 return self.expression(exp.Tuple, expressions=grouping_set) 4599 4600 return self._parse_column() 4601 4602 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4603 if not skip_having_token and not self._match(TokenType.HAVING): 4604 return None 4605 return self.expression( 4606 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4607 ) 4608 4609 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4610 if not self._match(TokenType.QUALIFY): 4611 return None 4612 return self.expression(exp.Qualify, this=self._parse_assignment()) 4613 4614 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4615 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4616 exp.Prior, this=self._parse_bitwise() 4617 ) 4618 connect = self._parse_assignment() 4619 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4620 return connect 4621 4622 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4623 if skip_start_token: 4624 start = None 4625 elif self._match(TokenType.START_WITH): 4626 start = self._parse_assignment() 4627 else: 4628 return None 4629 4630 self._match(TokenType.CONNECT_BY) 4631 nocycle = self._match_text_seq("NOCYCLE") 4632 connect = self._parse_connect_with_prior() 4633 4634 if not start and self._match(TokenType.START_WITH): 4635 start = self._parse_assignment() 4636 4637 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4638 4639 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4640 this = self._parse_id_var(any_token=True) 4641 if self._match(TokenType.ALIAS): 4642 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4643 return this 4644 4645 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4646 if self._match_text_seq("INTERPOLATE"): 4647 return self._parse_wrapped_csv(self._parse_name_as_expression) 4648 return None 4649 4650 def _parse_order( 4651 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4652 ) -> t.Optional[exp.Expression]: 4653 siblings = None 4654 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4655 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4656 return this 4657 4658 siblings = True 4659 4660 return self.expression( 4661 exp.Order, 4662 comments=self._prev_comments, 4663 this=this, 4664 expressions=self._parse_csv(self._parse_ordered), 4665 siblings=siblings, 4666 ) 4667 4668 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4669 if not self._match(token): 4670 return None 4671 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4672 4673 def _parse_ordered( 4674 self, parse_method: t.Optional[t.Callable] = None 4675 ) -> t.Optional[exp.Ordered]: 4676 this = parse_method() if parse_method else self._parse_assignment() 4677 if not this: 4678 return None 4679 4680 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4681 this = exp.var("ALL") 4682 4683 asc = self._match(TokenType.ASC) 4684 desc = self._match(TokenType.DESC) or (asc and False) 4685 4686 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4687 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4688 4689 nulls_first = is_nulls_first or False 4690 explicitly_null_ordered = is_nulls_first or is_nulls_last 4691 4692 if ( 4693 not explicitly_null_ordered 4694 and ( 4695 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4696 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4697 ) 4698 and self.dialect.NULL_ORDERING != "nulls_are_last" 4699 ): 4700 nulls_first = True 4701 4702 if self._match_text_seq("WITH", "FILL"): 4703 with_fill = self.expression( 4704 exp.WithFill, 4705 **{ # type: ignore 4706 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4707 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4708 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4709 "interpolate": self._parse_interpolate(), 4710 }, 4711 ) 4712 else: 4713 with_fill = None 4714 4715 return self.expression( 4716 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4717 ) 4718 4719 def _parse_limit_options(self) -> exp.LimitOptions: 4720 percent = self._match(TokenType.PERCENT) 4721 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4722 self._match_text_seq("ONLY") 4723 with_ties = self._match_text_seq("WITH", "TIES") 4724 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4725 4726 def _parse_limit( 4727 self, 4728 this: t.Optional[exp.Expression] = None, 4729 top: bool = False, 4730 skip_limit_token: bool = False, 4731 ) -> t.Optional[exp.Expression]: 4732 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4733 comments = self._prev_comments 4734 if top: 4735 limit_paren = self._match(TokenType.L_PAREN) 4736 expression = self._parse_term() if limit_paren else self._parse_number() 4737 4738 if limit_paren: 4739 self._match_r_paren() 4740 4741 limit_options = self._parse_limit_options() 4742 else: 4743 limit_options = None 4744 expression = self._parse_term() 4745 4746 if self._match(TokenType.COMMA): 4747 offset = expression 4748 expression = self._parse_term() 4749 else: 4750 offset = None 4751 4752 limit_exp = self.expression( 4753 exp.Limit, 4754 this=this, 4755 expression=expression, 4756 offset=offset, 4757 comments=comments, 4758 limit_options=limit_options, 4759 expressions=self._parse_limit_by(), 4760 ) 4761 4762 return limit_exp 4763 4764 if self._match(TokenType.FETCH): 4765 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4766 direction = self._prev.text.upper() if direction else "FIRST" 4767 4768 count = self._parse_field(tokens=self.FETCH_TOKENS) 4769 4770 return self.expression( 4771 exp.Fetch, 4772 direction=direction, 4773 count=count, 4774 limit_options=self._parse_limit_options(), 4775 ) 4776 4777 return this 4778 4779 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4780 if not self._match(TokenType.OFFSET): 4781 return this 4782 4783 count = self._parse_term() 4784 self._match_set((TokenType.ROW, TokenType.ROWS)) 4785 4786 return self.expression( 4787 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4788 ) 4789 4790 def _can_parse_limit_or_offset(self) -> bool: 4791 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4792 return False 4793 4794 index = self._index 4795 result = bool( 4796 self._try_parse(self._parse_limit, retreat=True) 4797 or self._try_parse(self._parse_offset, retreat=True) 4798 ) 4799 self._retreat(index) 4800 return result 4801 4802 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4803 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4804 4805 def _parse_locks(self) -> t.List[exp.Lock]: 4806 locks = [] 4807 while True: 4808 update, key = None, None 4809 if self._match_text_seq("FOR", "UPDATE"): 4810 update = True 4811 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4812 "LOCK", "IN", "SHARE", "MODE" 4813 ): 4814 update = False 4815 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4816 update, key = False, True 4817 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4818 update, key = True, True 4819 else: 4820 break 4821 4822 expressions = None 4823 if self._match_text_seq("OF"): 4824 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4825 4826 wait: t.Optional[bool | exp.Expression] = None 4827 if self._match_text_seq("NOWAIT"): 4828 wait = True 4829 elif self._match_text_seq("WAIT"): 4830 wait = self._parse_primary() 4831 elif self._match_text_seq("SKIP", "LOCKED"): 4832 wait = False 4833 4834 locks.append( 4835 self.expression( 4836 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4837 ) 4838 ) 4839 4840 return locks 4841 4842 def parse_set_operation( 4843 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4844 ) -> t.Optional[exp.Expression]: 4845 start = self._index 4846 _, side_token, kind_token = self._parse_join_parts() 4847 4848 side = side_token.text if side_token else None 4849 kind = kind_token.text if kind_token else None 4850 4851 if not self._match_set(self.SET_OPERATIONS): 4852 self._retreat(start) 4853 return None 4854 4855 token_type = self._prev.token_type 4856 4857 if token_type == TokenType.UNION: 4858 operation: t.Type[exp.SetOperation] = exp.Union 4859 elif token_type == TokenType.EXCEPT: 4860 operation = exp.Except 4861 else: 4862 operation = exp.Intersect 4863 4864 comments = self._prev.comments 4865 4866 if self._match(TokenType.DISTINCT): 4867 distinct: t.Optional[bool] = True 4868 elif self._match(TokenType.ALL): 4869 distinct = False 4870 else: 4871 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4872 if distinct is None: 4873 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4874 4875 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4876 "STRICT", "CORRESPONDING" 4877 ) 4878 if self._match_text_seq("CORRESPONDING"): 4879 by_name = True 4880 if not side and not kind: 4881 kind = "INNER" 4882 4883 on_column_list = None 4884 if by_name and self._match_texts(("ON", "BY")): 4885 on_column_list = self._parse_wrapped_csv(self._parse_column) 4886 4887 expression = self._parse_select( 4888 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4889 ) 4890 4891 return self.expression( 4892 operation, 4893 comments=comments, 4894 this=this, 4895 distinct=distinct, 4896 by_name=by_name, 4897 expression=expression, 4898 side=side, 4899 kind=kind, 4900 on=on_column_list, 4901 ) 4902 4903 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4904 while this: 4905 setop = self.parse_set_operation(this) 4906 if not setop: 4907 break 4908 this = setop 4909 4910 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4911 expression = this.expression 4912 4913 if expression: 4914 for arg in self.SET_OP_MODIFIERS: 4915 expr = expression.args.get(arg) 4916 if expr: 4917 this.set(arg, expr.pop()) 4918 4919 return this 4920 4921 def _parse_expression(self) -> t.Optional[exp.Expression]: 4922 return self._parse_alias(self._parse_assignment()) 4923 4924 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4925 this = self._parse_disjunction() 4926 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4927 # This allows us to parse <non-identifier token> := <expr> 4928 this = exp.column( 4929 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4930 ) 4931 4932 while self._match_set(self.ASSIGNMENT): 4933 if isinstance(this, exp.Column) and len(this.parts) == 1: 4934 this = this.this 4935 4936 this = self.expression( 4937 self.ASSIGNMENT[self._prev.token_type], 4938 this=this, 4939 comments=self._prev_comments, 4940 expression=self._parse_assignment(), 4941 ) 4942 4943 return this 4944 4945 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4946 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4947 4948 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4949 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4950 4951 def _parse_equality(self) -> t.Optional[exp.Expression]: 4952 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4953 4954 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4955 return self._parse_tokens(self._parse_range, self.COMPARISON) 4956 4957 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4958 this = this or self._parse_bitwise() 4959 negate = self._match(TokenType.NOT) 4960 4961 if self._match_set(self.RANGE_PARSERS): 4962 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4963 if not expression: 4964 return this 4965 4966 this = expression 4967 elif self._match(TokenType.ISNULL): 4968 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4969 4970 # Postgres supports ISNULL and NOTNULL for conditions. 4971 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4972 if self._match(TokenType.NOTNULL): 4973 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4974 this = self.expression(exp.Not, this=this) 4975 4976 if negate: 4977 this = self._negate_range(this) 4978 4979 if self._match(TokenType.IS): 4980 this = self._parse_is(this) 4981 4982 return this 4983 4984 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4985 if not this: 4986 return this 4987 4988 return self.expression(exp.Not, this=this) 4989 4990 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4991 index = self._index - 1 4992 negate = self._match(TokenType.NOT) 4993 4994 if self._match_text_seq("DISTINCT", "FROM"): 4995 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4996 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4997 4998 if self._match(TokenType.JSON): 4999 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5000 5001 if self._match_text_seq("WITH"): 5002 _with = True 5003 elif self._match_text_seq("WITHOUT"): 5004 _with = False 5005 else: 5006 _with = None 5007 5008 unique = self._match(TokenType.UNIQUE) 5009 self._match_text_seq("KEYS") 5010 expression: t.Optional[exp.Expression] = self.expression( 5011 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5012 ) 5013 else: 5014 expression = self._parse_primary() or self._parse_null() 5015 if not expression: 5016 self._retreat(index) 5017 return None 5018 5019 this = self.expression(exp.Is, this=this, expression=expression) 5020 return self.expression(exp.Not, this=this) if negate else this 5021 5022 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5023 unnest = self._parse_unnest(with_alias=False) 5024 if unnest: 5025 this = self.expression(exp.In, this=this, unnest=unnest) 5026 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5027 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5028 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5029 5030 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5031 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5032 else: 5033 this = self.expression(exp.In, this=this, expressions=expressions) 5034 5035 if matched_l_paren: 5036 self._match_r_paren(this) 5037 elif not self._match(TokenType.R_BRACKET, expression=this): 5038 self.raise_error("Expecting ]") 5039 else: 5040 this = self.expression(exp.In, this=this, field=self._parse_column()) 5041 5042 return this 5043 5044 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5045 symmetric = None 5046 if self._match_text_seq("SYMMETRIC"): 5047 symmetric = True 5048 elif self._match_text_seq("ASYMMETRIC"): 5049 symmetric = False 5050 5051 low = self._parse_bitwise() 5052 self._match(TokenType.AND) 5053 high = self._parse_bitwise() 5054 5055 return self.expression( 5056 exp.Between, 5057 this=this, 5058 low=low, 5059 high=high, 5060 symmetric=symmetric, 5061 ) 5062 5063 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5064 if not self._match(TokenType.ESCAPE): 5065 return this 5066 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5067 5068 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5069 index = self._index 5070 5071 if not self._match(TokenType.INTERVAL) and match_interval: 5072 return None 5073 5074 if self._match(TokenType.STRING, advance=False): 5075 this = self._parse_primary() 5076 else: 5077 this = self._parse_term() 5078 5079 if not this or ( 5080 isinstance(this, exp.Column) 5081 and not this.table 5082 and not this.this.quoted 5083 and this.name.upper() == "IS" 5084 ): 5085 self._retreat(index) 5086 return None 5087 5088 unit = self._parse_function() or ( 5089 not self._match(TokenType.ALIAS, advance=False) 5090 and self._parse_var(any_token=True, upper=True) 5091 ) 5092 5093 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5094 # each INTERVAL expression into this canonical form so it's easy to transpile 5095 if this and this.is_number: 5096 this = exp.Literal.string(this.to_py()) 5097 elif this and this.is_string: 5098 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5099 if parts and unit: 5100 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5101 unit = None 5102 self._retreat(self._index - 1) 5103 5104 if len(parts) == 1: 5105 this = exp.Literal.string(parts[0][0]) 5106 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5107 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5108 unit = self.expression( 5109 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5110 ) 5111 5112 interval = self.expression(exp.Interval, this=this, unit=unit) 5113 5114 index = self._index 5115 self._match(TokenType.PLUS) 5116 5117 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5118 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5119 return self.expression( 5120 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5121 ) 5122 5123 self._retreat(index) 5124 return interval 5125 5126 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5127 this = self._parse_term() 5128 5129 while True: 5130 if self._match_set(self.BITWISE): 5131 this = self.expression( 5132 self.BITWISE[self._prev.token_type], 5133 this=this, 5134 expression=self._parse_term(), 5135 ) 5136 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5137 this = self.expression( 5138 exp.DPipe, 5139 this=this, 5140 expression=self._parse_term(), 5141 safe=not self.dialect.STRICT_STRING_CONCAT, 5142 ) 5143 elif self._match(TokenType.DQMARK): 5144 this = self.expression( 5145 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5146 ) 5147 elif self._match_pair(TokenType.LT, TokenType.LT): 5148 this = self.expression( 5149 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5150 ) 5151 elif self._match_pair(TokenType.GT, TokenType.GT): 5152 this = self.expression( 5153 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5154 ) 5155 else: 5156 break 5157 5158 return this 5159 5160 def _parse_term(self) -> t.Optional[exp.Expression]: 5161 this = self._parse_factor() 5162 5163 while self._match_set(self.TERM): 5164 klass = self.TERM[self._prev.token_type] 5165 comments = self._prev_comments 5166 expression = self._parse_factor() 5167 5168 this = self.expression(klass, this=this, comments=comments, expression=expression) 5169 5170 if isinstance(this, exp.Collate): 5171 expr = this.expression 5172 5173 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5174 # fallback to Identifier / Var 5175 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5176 ident = expr.this 5177 if isinstance(ident, exp.Identifier): 5178 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5179 5180 return this 5181 5182 def _parse_factor(self) -> t.Optional[exp.Expression]: 5183 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5184 this = parse_method() 5185 5186 while self._match_set(self.FACTOR): 5187 klass = self.FACTOR[self._prev.token_type] 5188 comments = self._prev_comments 5189 expression = parse_method() 5190 5191 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5192 self._retreat(self._index - 1) 5193 return this 5194 5195 this = self.expression(klass, this=this, comments=comments, expression=expression) 5196 5197 if isinstance(this, exp.Div): 5198 this.args["typed"] = self.dialect.TYPED_DIVISION 5199 this.args["safe"] = self.dialect.SAFE_DIVISION 5200 5201 return this 5202 5203 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5204 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5205 5206 def _parse_unary(self) -> t.Optional[exp.Expression]: 5207 if self._match_set(self.UNARY_PARSERS): 5208 return self.UNARY_PARSERS[self._prev.token_type](self) 5209 return self._parse_at_time_zone(self._parse_type()) 5210 5211 def _parse_type( 5212 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5213 ) -> t.Optional[exp.Expression]: 5214 interval = parse_interval and self._parse_interval() 5215 if interval: 5216 return interval 5217 5218 index = self._index 5219 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5220 5221 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5222 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5223 if isinstance(data_type, exp.Cast): 5224 # This constructor can contain ops directly after it, for instance struct unnesting: 5225 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5226 return self._parse_column_ops(data_type) 5227 5228 if data_type: 5229 index2 = self._index 5230 this = self._parse_primary() 5231 5232 if isinstance(this, exp.Literal): 5233 literal = this.name 5234 this = self._parse_column_ops(this) 5235 5236 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5237 if parser: 5238 return parser(self, this, data_type) 5239 5240 if ( 5241 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5242 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5243 and TIME_ZONE_RE.search(literal) 5244 ): 5245 data_type = exp.DataType.build("TIMESTAMPTZ") 5246 5247 return self.expression(exp.Cast, this=this, to=data_type) 5248 5249 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5250 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5251 # 5252 # If the index difference here is greater than 1, that means the parser itself must have 5253 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5254 # 5255 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5256 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5257 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5258 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5259 # 5260 # In these cases, we don't really want to return the converted type, but instead retreat 5261 # and try to parse a Column or Identifier in the section below. 5262 if data_type.expressions and index2 - index > 1: 5263 self._retreat(index2) 5264 return self._parse_column_ops(data_type) 5265 5266 self._retreat(index) 5267 5268 if fallback_to_identifier: 5269 return self._parse_id_var() 5270 5271 this = self._parse_column() 5272 return this and self._parse_column_ops(this) 5273 5274 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5275 this = self._parse_type() 5276 if not this: 5277 return None 5278 5279 if isinstance(this, exp.Column) and not this.table: 5280 this = exp.var(this.name.upper()) 5281 5282 return self.expression( 5283 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5284 ) 5285 5286 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5287 type_name = identifier.name 5288 5289 while self._match(TokenType.DOT): 5290 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5291 5292 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5293 5294 def _parse_types( 5295 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5296 ) -> t.Optional[exp.Expression]: 5297 index = self._index 5298 5299 this: t.Optional[exp.Expression] = None 5300 prefix = self._match_text_seq("SYSUDTLIB", ".") 5301 5302 if self._match_set(self.TYPE_TOKENS): 5303 type_token = self._prev.token_type 5304 else: 5305 type_token = None 5306 identifier = allow_identifiers and self._parse_id_var( 5307 any_token=False, tokens=(TokenType.VAR,) 5308 ) 5309 if isinstance(identifier, exp.Identifier): 5310 try: 5311 tokens = self.dialect.tokenize(identifier.name) 5312 except TokenError: 5313 tokens = None 5314 5315 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5316 type_token = tokens[0].token_type 5317 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5318 this = self._parse_user_defined_type(identifier) 5319 else: 5320 self._retreat(self._index - 1) 5321 return None 5322 else: 5323 return None 5324 5325 if type_token == TokenType.PSEUDO_TYPE: 5326 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5327 5328 if type_token == TokenType.OBJECT_IDENTIFIER: 5329 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5330 5331 # https://materialize.com/docs/sql/types/map/ 5332 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5333 key_type = self._parse_types( 5334 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5335 ) 5336 if not self._match(TokenType.FARROW): 5337 self._retreat(index) 5338 return None 5339 5340 value_type = self._parse_types( 5341 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5342 ) 5343 if not self._match(TokenType.R_BRACKET): 5344 self._retreat(index) 5345 return None 5346 5347 return exp.DataType( 5348 this=exp.DataType.Type.MAP, 5349 expressions=[key_type, value_type], 5350 nested=True, 5351 prefix=prefix, 5352 ) 5353 5354 nested = type_token in self.NESTED_TYPE_TOKENS 5355 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5356 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5357 expressions = None 5358 maybe_func = False 5359 5360 if self._match(TokenType.L_PAREN): 5361 if is_struct: 5362 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5363 elif nested: 5364 expressions = self._parse_csv( 5365 lambda: self._parse_types( 5366 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5367 ) 5368 ) 5369 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5370 this = expressions[0] 5371 this.set("nullable", True) 5372 self._match_r_paren() 5373 return this 5374 elif type_token in self.ENUM_TYPE_TOKENS: 5375 expressions = self._parse_csv(self._parse_equality) 5376 elif is_aggregate: 5377 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5378 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5379 ) 5380 if not func_or_ident: 5381 return None 5382 expressions = [func_or_ident] 5383 if self._match(TokenType.COMMA): 5384 expressions.extend( 5385 self._parse_csv( 5386 lambda: self._parse_types( 5387 check_func=check_func, 5388 schema=schema, 5389 allow_identifiers=allow_identifiers, 5390 ) 5391 ) 5392 ) 5393 else: 5394 expressions = self._parse_csv(self._parse_type_size) 5395 5396 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5397 if type_token == TokenType.VECTOR and len(expressions) == 2: 5398 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5399 5400 if not expressions or not self._match(TokenType.R_PAREN): 5401 self._retreat(index) 5402 return None 5403 5404 maybe_func = True 5405 5406 values: t.Optional[t.List[exp.Expression]] = None 5407 5408 if nested and self._match(TokenType.LT): 5409 if is_struct: 5410 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5411 else: 5412 expressions = self._parse_csv( 5413 lambda: self._parse_types( 5414 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5415 ) 5416 ) 5417 5418 if not self._match(TokenType.GT): 5419 self.raise_error("Expecting >") 5420 5421 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5422 values = self._parse_csv(self._parse_assignment) 5423 if not values and is_struct: 5424 values = None 5425 self._retreat(self._index - 1) 5426 else: 5427 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5428 5429 if type_token in self.TIMESTAMPS: 5430 if self._match_text_seq("WITH", "TIME", "ZONE"): 5431 maybe_func = False 5432 tz_type = ( 5433 exp.DataType.Type.TIMETZ 5434 if type_token in self.TIMES 5435 else exp.DataType.Type.TIMESTAMPTZ 5436 ) 5437 this = exp.DataType(this=tz_type, expressions=expressions) 5438 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5439 maybe_func = False 5440 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5441 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5442 maybe_func = False 5443 elif type_token == TokenType.INTERVAL: 5444 unit = self._parse_var(upper=True) 5445 if unit: 5446 if self._match_text_seq("TO"): 5447 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5448 5449 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5450 else: 5451 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5452 elif type_token == TokenType.VOID: 5453 this = exp.DataType(this=exp.DataType.Type.NULL) 5454 5455 if maybe_func and check_func: 5456 index2 = self._index 5457 peek = self._parse_string() 5458 5459 if not peek: 5460 self._retreat(index) 5461 return None 5462 5463 self._retreat(index2) 5464 5465 if not this: 5466 if self._match_text_seq("UNSIGNED"): 5467 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5468 if not unsigned_type_token: 5469 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5470 5471 type_token = unsigned_type_token or type_token 5472 5473 this = exp.DataType( 5474 this=exp.DataType.Type[type_token.value], 5475 expressions=expressions, 5476 nested=nested, 5477 prefix=prefix, 5478 ) 5479 5480 # Empty arrays/structs are allowed 5481 if values is not None: 5482 cls = exp.Struct if is_struct else exp.Array 5483 this = exp.cast(cls(expressions=values), this, copy=False) 5484 5485 elif expressions: 5486 this.set("expressions", expressions) 5487 5488 # https://materialize.com/docs/sql/types/list/#type-name 5489 while self._match(TokenType.LIST): 5490 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5491 5492 index = self._index 5493 5494 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5495 matched_array = self._match(TokenType.ARRAY) 5496 5497 while self._curr: 5498 datatype_token = self._prev.token_type 5499 matched_l_bracket = self._match(TokenType.L_BRACKET) 5500 5501 if (not matched_l_bracket and not matched_array) or ( 5502 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5503 ): 5504 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5505 # not to be confused with the fixed size array parsing 5506 break 5507 5508 matched_array = False 5509 values = self._parse_csv(self._parse_assignment) or None 5510 if ( 5511 values 5512 and not schema 5513 and ( 5514 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5515 ) 5516 ): 5517 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5518 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5519 self._retreat(index) 5520 break 5521 5522 this = exp.DataType( 5523 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5524 ) 5525 self._match(TokenType.R_BRACKET) 5526 5527 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5528 converter = self.TYPE_CONVERTERS.get(this.this) 5529 if converter: 5530 this = converter(t.cast(exp.DataType, this)) 5531 5532 return this 5533 5534 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5535 index = self._index 5536 5537 if ( 5538 self._curr 5539 and self._next 5540 and self._curr.token_type in self.TYPE_TOKENS 5541 and self._next.token_type in self.TYPE_TOKENS 5542 ): 5543 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5544 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5545 this = self._parse_id_var() 5546 else: 5547 this = ( 5548 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5549 or self._parse_id_var() 5550 ) 5551 5552 self._match(TokenType.COLON) 5553 5554 if ( 5555 type_required 5556 and not isinstance(this, exp.DataType) 5557 and not self._match_set(self.TYPE_TOKENS, advance=False) 5558 ): 5559 self._retreat(index) 5560 return self._parse_types() 5561 5562 return self._parse_column_def(this) 5563 5564 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5565 if not self._match_text_seq("AT", "TIME", "ZONE"): 5566 return this 5567 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5568 5569 def _parse_column(self) -> t.Optional[exp.Expression]: 5570 this = self._parse_column_reference() 5571 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5572 5573 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5574 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5575 5576 return column 5577 5578 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5579 this = self._parse_field() 5580 if ( 5581 not this 5582 and self._match(TokenType.VALUES, advance=False) 5583 and self.VALUES_FOLLOWED_BY_PAREN 5584 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5585 ): 5586 this = self._parse_id_var() 5587 5588 if isinstance(this, exp.Identifier): 5589 # We bubble up comments from the Identifier to the Column 5590 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5591 5592 return this 5593 5594 def _parse_colon_as_variant_extract( 5595 self, this: t.Optional[exp.Expression] 5596 ) -> t.Optional[exp.Expression]: 5597 casts = [] 5598 json_path = [] 5599 escape = None 5600 5601 while self._match(TokenType.COLON): 5602 start_index = self._index 5603 5604 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5605 path = self._parse_column_ops( 5606 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5607 ) 5608 5609 # The cast :: operator has a lower precedence than the extraction operator :, so 5610 # we rearrange the AST appropriately to avoid casting the JSON path 5611 while isinstance(path, exp.Cast): 5612 casts.append(path.to) 5613 path = path.this 5614 5615 if casts: 5616 dcolon_offset = next( 5617 i 5618 for i, t in enumerate(self._tokens[start_index:]) 5619 if t.token_type == TokenType.DCOLON 5620 ) 5621 end_token = self._tokens[start_index + dcolon_offset - 1] 5622 else: 5623 end_token = self._prev 5624 5625 if path: 5626 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5627 # it'll roundtrip to a string literal in GET_PATH 5628 if isinstance(path, exp.Identifier) and path.quoted: 5629 escape = True 5630 5631 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5632 5633 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5634 # Databricks transforms it back to the colon/dot notation 5635 if json_path: 5636 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5637 5638 if json_path_expr: 5639 json_path_expr.set("escape", escape) 5640 5641 this = self.expression( 5642 exp.JSONExtract, 5643 this=this, 5644 expression=json_path_expr, 5645 variant_extract=True, 5646 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5647 ) 5648 5649 while casts: 5650 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5651 5652 return this 5653 5654 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5655 return self._parse_types() 5656 5657 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5658 this = self._parse_bracket(this) 5659 5660 while self._match_set(self.COLUMN_OPERATORS): 5661 op_token = self._prev.token_type 5662 op = self.COLUMN_OPERATORS.get(op_token) 5663 5664 if op_token in self.CAST_COLUMN_OPERATORS: 5665 field = self._parse_dcolon() 5666 if not field: 5667 self.raise_error("Expected type") 5668 elif op and self._curr: 5669 field = self._parse_column_reference() or self._parse_bracket() 5670 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5671 field = self._parse_column_ops(field) 5672 else: 5673 field = self._parse_field(any_token=True, anonymous_func=True) 5674 5675 # Function calls can be qualified, e.g., x.y.FOO() 5676 # This converts the final AST to a series of Dots leading to the function call 5677 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5678 if isinstance(field, (exp.Func, exp.Window)) and this: 5679 this = this.transform( 5680 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5681 ) 5682 5683 if op: 5684 this = op(self, this, field) 5685 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5686 this = self.expression( 5687 exp.Column, 5688 comments=this.comments, 5689 this=field, 5690 table=this.this, 5691 db=this.args.get("table"), 5692 catalog=this.args.get("db"), 5693 ) 5694 elif isinstance(field, exp.Window): 5695 # Move the exp.Dot's to the window's function 5696 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5697 field.set("this", window_func) 5698 this = field 5699 else: 5700 this = self.expression(exp.Dot, this=this, expression=field) 5701 5702 if field and field.comments: 5703 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5704 5705 this = self._parse_bracket(this) 5706 5707 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5708 5709 def _parse_paren(self) -> t.Optional[exp.Expression]: 5710 if not self._match(TokenType.L_PAREN): 5711 return None 5712 5713 comments = self._prev_comments 5714 query = self._parse_select() 5715 5716 if query: 5717 expressions = [query] 5718 else: 5719 expressions = self._parse_expressions() 5720 5721 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5722 5723 if not this and self._match(TokenType.R_PAREN, advance=False): 5724 this = self.expression(exp.Tuple) 5725 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5726 this = self._parse_subquery(this=this, parse_alias=False) 5727 elif isinstance(this, exp.Subquery): 5728 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5729 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5730 this = self.expression(exp.Tuple, expressions=expressions) 5731 else: 5732 this = self.expression(exp.Paren, this=this) 5733 5734 if this: 5735 this.add_comments(comments) 5736 5737 self._match_r_paren(expression=this) 5738 return this 5739 5740 def _parse_primary(self) -> t.Optional[exp.Expression]: 5741 if self._match_set(self.PRIMARY_PARSERS): 5742 token_type = self._prev.token_type 5743 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5744 5745 if token_type == TokenType.STRING: 5746 expressions = [primary] 5747 while self._match(TokenType.STRING): 5748 expressions.append(exp.Literal.string(self._prev.text)) 5749 5750 if len(expressions) > 1: 5751 return self.expression(exp.Concat, expressions=expressions) 5752 5753 return primary 5754 5755 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5756 return exp.Literal.number(f"0.{self._prev.text}") 5757 5758 return self._parse_paren() 5759 5760 def _parse_field( 5761 self, 5762 any_token: bool = False, 5763 tokens: t.Optional[t.Collection[TokenType]] = None, 5764 anonymous_func: bool = False, 5765 ) -> t.Optional[exp.Expression]: 5766 if anonymous_func: 5767 field = ( 5768 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5769 or self._parse_primary() 5770 ) 5771 else: 5772 field = self._parse_primary() or self._parse_function( 5773 anonymous=anonymous_func, any_token=any_token 5774 ) 5775 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5776 5777 def _parse_function( 5778 self, 5779 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5780 anonymous: bool = False, 5781 optional_parens: bool = True, 5782 any_token: bool = False, 5783 ) -> t.Optional[exp.Expression]: 5784 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5785 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5786 fn_syntax = False 5787 if ( 5788 self._match(TokenType.L_BRACE, advance=False) 5789 and self._next 5790 and self._next.text.upper() == "FN" 5791 ): 5792 self._advance(2) 5793 fn_syntax = True 5794 5795 func = self._parse_function_call( 5796 functions=functions, 5797 anonymous=anonymous, 5798 optional_parens=optional_parens, 5799 any_token=any_token, 5800 ) 5801 5802 if fn_syntax: 5803 self._match(TokenType.R_BRACE) 5804 5805 return func 5806 5807 def _parse_function_call( 5808 self, 5809 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5810 anonymous: bool = False, 5811 optional_parens: bool = True, 5812 any_token: bool = False, 5813 ) -> t.Optional[exp.Expression]: 5814 if not self._curr: 5815 return None 5816 5817 comments = self._curr.comments 5818 prev = self._prev 5819 token = self._curr 5820 token_type = self._curr.token_type 5821 this = self._curr.text 5822 upper = this.upper() 5823 5824 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5825 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5826 self._advance() 5827 return self._parse_window(parser(self)) 5828 5829 if not self._next or self._next.token_type != TokenType.L_PAREN: 5830 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5831 self._advance() 5832 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5833 5834 return None 5835 5836 if any_token: 5837 if token_type in self.RESERVED_TOKENS: 5838 return None 5839 elif token_type not in self.FUNC_TOKENS: 5840 return None 5841 5842 self._advance(2) 5843 5844 parser = self.FUNCTION_PARSERS.get(upper) 5845 if parser and not anonymous: 5846 this = parser(self) 5847 else: 5848 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5849 5850 if subquery_predicate: 5851 expr = None 5852 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5853 expr = self._parse_select() 5854 self._match_r_paren() 5855 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5856 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5857 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5858 self._advance(-1) 5859 expr = self._parse_bitwise() 5860 5861 if expr: 5862 return self.expression(subquery_predicate, comments=comments, this=expr) 5863 5864 if functions is None: 5865 functions = self.FUNCTIONS 5866 5867 function = functions.get(upper) 5868 known_function = function and not anonymous 5869 5870 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5871 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5872 5873 post_func_comments = self._curr and self._curr.comments 5874 if known_function and post_func_comments: 5875 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5876 # call we'll construct it as exp.Anonymous, even if it's "known" 5877 if any( 5878 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5879 for comment in post_func_comments 5880 ): 5881 known_function = False 5882 5883 if alias and known_function: 5884 args = self._kv_to_prop_eq(args) 5885 5886 if known_function: 5887 func_builder = t.cast(t.Callable, function) 5888 5889 if "dialect" in func_builder.__code__.co_varnames: 5890 func = func_builder(args, dialect=self.dialect) 5891 else: 5892 func = func_builder(args) 5893 5894 func = self.validate_expression(func, args) 5895 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5896 func.meta["name"] = this 5897 5898 this = func 5899 else: 5900 if token_type == TokenType.IDENTIFIER: 5901 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5902 5903 this = self.expression(exp.Anonymous, this=this, expressions=args) 5904 this = this.update_positions(token) 5905 5906 if isinstance(this, exp.Expression): 5907 this.add_comments(comments) 5908 5909 self._match_r_paren(this) 5910 return self._parse_window(this) 5911 5912 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5913 return expression 5914 5915 def _kv_to_prop_eq( 5916 self, expressions: t.List[exp.Expression], parse_map: bool = False 5917 ) -> t.List[exp.Expression]: 5918 transformed = [] 5919 5920 for index, e in enumerate(expressions): 5921 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5922 if isinstance(e, exp.Alias): 5923 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5924 5925 if not isinstance(e, exp.PropertyEQ): 5926 e = self.expression( 5927 exp.PropertyEQ, 5928 this=e.this if parse_map else exp.to_identifier(e.this.name), 5929 expression=e.expression, 5930 ) 5931 5932 if isinstance(e.this, exp.Column): 5933 e.this.replace(e.this.this) 5934 else: 5935 e = self._to_prop_eq(e, index) 5936 5937 transformed.append(e) 5938 5939 return transformed 5940 5941 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5942 return self._parse_statement() 5943 5944 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5945 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5946 5947 def _parse_user_defined_function( 5948 self, kind: t.Optional[TokenType] = None 5949 ) -> t.Optional[exp.Expression]: 5950 this = self._parse_table_parts(schema=True) 5951 5952 if not self._match(TokenType.L_PAREN): 5953 return this 5954 5955 expressions = self._parse_csv(self._parse_function_parameter) 5956 self._match_r_paren() 5957 return self.expression( 5958 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5959 ) 5960 5961 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5962 literal = self._parse_primary() 5963 if literal: 5964 return self.expression(exp.Introducer, this=token.text, expression=literal) 5965 5966 return self._identifier_expression(token) 5967 5968 def _parse_session_parameter(self) -> exp.SessionParameter: 5969 kind = None 5970 this = self._parse_id_var() or self._parse_primary() 5971 5972 if this and self._match(TokenType.DOT): 5973 kind = this.name 5974 this = self._parse_var() or self._parse_primary() 5975 5976 return self.expression(exp.SessionParameter, this=this, kind=kind) 5977 5978 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5979 return self._parse_id_var() 5980 5981 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5982 index = self._index 5983 5984 if self._match(TokenType.L_PAREN): 5985 expressions = t.cast( 5986 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5987 ) 5988 5989 if not self._match(TokenType.R_PAREN): 5990 self._retreat(index) 5991 else: 5992 expressions = [self._parse_lambda_arg()] 5993 5994 if self._match_set(self.LAMBDAS): 5995 return self.LAMBDAS[self._prev.token_type](self, expressions) 5996 5997 self._retreat(index) 5998 5999 this: t.Optional[exp.Expression] 6000 6001 if self._match(TokenType.DISTINCT): 6002 this = self.expression( 6003 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6004 ) 6005 else: 6006 this = self._parse_select_or_expression(alias=alias) 6007 6008 return self._parse_limit( 6009 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6010 ) 6011 6012 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6013 index = self._index 6014 if not self._match(TokenType.L_PAREN): 6015 return this 6016 6017 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6018 # expr can be of both types 6019 if self._match_set(self.SELECT_START_TOKENS): 6020 self._retreat(index) 6021 return this 6022 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6023 self._match_r_paren() 6024 return self.expression(exp.Schema, this=this, expressions=args) 6025 6026 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6027 return self._parse_column_def(self._parse_field(any_token=True)) 6028 6029 def _parse_column_def( 6030 self, this: t.Optional[exp.Expression], computed_column: bool = True 6031 ) -> t.Optional[exp.Expression]: 6032 # column defs are not really columns, they're identifiers 6033 if isinstance(this, exp.Column): 6034 this = this.this 6035 6036 if not computed_column: 6037 self._match(TokenType.ALIAS) 6038 6039 kind = self._parse_types(schema=True) 6040 6041 if self._match_text_seq("FOR", "ORDINALITY"): 6042 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6043 6044 constraints: t.List[exp.Expression] = [] 6045 6046 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6047 ("ALIAS", "MATERIALIZED") 6048 ): 6049 persisted = self._prev.text.upper() == "MATERIALIZED" 6050 constraint_kind = exp.ComputedColumnConstraint( 6051 this=self._parse_assignment(), 6052 persisted=persisted or self._match_text_seq("PERSISTED"), 6053 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6054 ) 6055 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6056 elif ( 6057 kind 6058 and self._match(TokenType.ALIAS, advance=False) 6059 and ( 6060 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6061 or (self._next and self._next.token_type == TokenType.L_PAREN) 6062 ) 6063 ): 6064 self._advance() 6065 constraints.append( 6066 self.expression( 6067 exp.ColumnConstraint, 6068 kind=exp.ComputedColumnConstraint( 6069 this=self._parse_disjunction(), 6070 persisted=self._match_texts(("STORED", "VIRTUAL")) 6071 and self._prev.text.upper() == "STORED", 6072 ), 6073 ) 6074 ) 6075 6076 while True: 6077 constraint = self._parse_column_constraint() 6078 if not constraint: 6079 break 6080 constraints.append(constraint) 6081 6082 if not kind and not constraints: 6083 return this 6084 6085 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6086 6087 def _parse_auto_increment( 6088 self, 6089 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6090 start = None 6091 increment = None 6092 order = None 6093 6094 if self._match(TokenType.L_PAREN, advance=False): 6095 args = self._parse_wrapped_csv(self._parse_bitwise) 6096 start = seq_get(args, 0) 6097 increment = seq_get(args, 1) 6098 elif self._match_text_seq("START"): 6099 start = self._parse_bitwise() 6100 self._match_text_seq("INCREMENT") 6101 increment = self._parse_bitwise() 6102 if self._match_text_seq("ORDER"): 6103 order = True 6104 elif self._match_text_seq("NOORDER"): 6105 order = False 6106 6107 if start and increment: 6108 return exp.GeneratedAsIdentityColumnConstraint( 6109 start=start, increment=increment, this=False, order=order 6110 ) 6111 6112 return exp.AutoIncrementColumnConstraint() 6113 6114 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6115 if not self._match_text_seq("REFRESH"): 6116 self._retreat(self._index - 1) 6117 return None 6118 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6119 6120 def _parse_compress(self) -> exp.CompressColumnConstraint: 6121 if self._match(TokenType.L_PAREN, advance=False): 6122 return self.expression( 6123 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6124 ) 6125 6126 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6127 6128 def _parse_generated_as_identity( 6129 self, 6130 ) -> ( 6131 exp.GeneratedAsIdentityColumnConstraint 6132 | exp.ComputedColumnConstraint 6133 | exp.GeneratedAsRowColumnConstraint 6134 ): 6135 if self._match_text_seq("BY", "DEFAULT"): 6136 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6137 this = self.expression( 6138 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6139 ) 6140 else: 6141 self._match_text_seq("ALWAYS") 6142 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6143 6144 self._match(TokenType.ALIAS) 6145 6146 if self._match_text_seq("ROW"): 6147 start = self._match_text_seq("START") 6148 if not start: 6149 self._match(TokenType.END) 6150 hidden = self._match_text_seq("HIDDEN") 6151 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6152 6153 identity = self._match_text_seq("IDENTITY") 6154 6155 if self._match(TokenType.L_PAREN): 6156 if self._match(TokenType.START_WITH): 6157 this.set("start", self._parse_bitwise()) 6158 if self._match_text_seq("INCREMENT", "BY"): 6159 this.set("increment", self._parse_bitwise()) 6160 if self._match_text_seq("MINVALUE"): 6161 this.set("minvalue", self._parse_bitwise()) 6162 if self._match_text_seq("MAXVALUE"): 6163 this.set("maxvalue", self._parse_bitwise()) 6164 6165 if self._match_text_seq("CYCLE"): 6166 this.set("cycle", True) 6167 elif self._match_text_seq("NO", "CYCLE"): 6168 this.set("cycle", False) 6169 6170 if not identity: 6171 this.set("expression", self._parse_range()) 6172 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6173 args = self._parse_csv(self._parse_bitwise) 6174 this.set("start", seq_get(args, 0)) 6175 this.set("increment", seq_get(args, 1)) 6176 6177 self._match_r_paren() 6178 6179 return this 6180 6181 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6182 self._match_text_seq("LENGTH") 6183 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6184 6185 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6186 if self._match_text_seq("NULL"): 6187 return self.expression(exp.NotNullColumnConstraint) 6188 if self._match_text_seq("CASESPECIFIC"): 6189 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6190 if self._match_text_seq("FOR", "REPLICATION"): 6191 return self.expression(exp.NotForReplicationColumnConstraint) 6192 6193 # Unconsume the `NOT` token 6194 self._retreat(self._index - 1) 6195 return None 6196 6197 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6198 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6199 6200 procedure_option_follows = ( 6201 self._match(TokenType.WITH, advance=False) 6202 and self._next 6203 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6204 ) 6205 6206 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6207 return self.expression( 6208 exp.ColumnConstraint, 6209 this=this, 6210 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6211 ) 6212 6213 return this 6214 6215 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6216 if not self._match(TokenType.CONSTRAINT): 6217 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6218 6219 return self.expression( 6220 exp.Constraint, 6221 this=self._parse_id_var(), 6222 expressions=self._parse_unnamed_constraints(), 6223 ) 6224 6225 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6226 constraints = [] 6227 while True: 6228 constraint = self._parse_unnamed_constraint() or self._parse_function() 6229 if not constraint: 6230 break 6231 constraints.append(constraint) 6232 6233 return constraints 6234 6235 def _parse_unnamed_constraint( 6236 self, constraints: t.Optional[t.Collection[str]] = None 6237 ) -> t.Optional[exp.Expression]: 6238 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6239 constraints or self.CONSTRAINT_PARSERS 6240 ): 6241 return None 6242 6243 constraint = self._prev.text.upper() 6244 if constraint not in self.CONSTRAINT_PARSERS: 6245 self.raise_error(f"No parser found for schema constraint {constraint}.") 6246 6247 return self.CONSTRAINT_PARSERS[constraint](self) 6248 6249 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6250 return self._parse_id_var(any_token=False) 6251 6252 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6253 self._match_texts(("KEY", "INDEX")) 6254 return self.expression( 6255 exp.UniqueColumnConstraint, 6256 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6257 this=self._parse_schema(self._parse_unique_key()), 6258 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6259 on_conflict=self._parse_on_conflict(), 6260 options=self._parse_key_constraint_options(), 6261 ) 6262 6263 def _parse_key_constraint_options(self) -> t.List[str]: 6264 options = [] 6265 while True: 6266 if not self._curr: 6267 break 6268 6269 if self._match(TokenType.ON): 6270 action = None 6271 on = self._advance_any() and self._prev.text 6272 6273 if self._match_text_seq("NO", "ACTION"): 6274 action = "NO ACTION" 6275 elif self._match_text_seq("CASCADE"): 6276 action = "CASCADE" 6277 elif self._match_text_seq("RESTRICT"): 6278 action = "RESTRICT" 6279 elif self._match_pair(TokenType.SET, TokenType.NULL): 6280 action = "SET NULL" 6281 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6282 action = "SET DEFAULT" 6283 else: 6284 self.raise_error("Invalid key constraint") 6285 6286 options.append(f"ON {on} {action}") 6287 else: 6288 var = self._parse_var_from_options( 6289 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6290 ) 6291 if not var: 6292 break 6293 options.append(var.name) 6294 6295 return options 6296 6297 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6298 if match and not self._match(TokenType.REFERENCES): 6299 return None 6300 6301 expressions = None 6302 this = self._parse_table(schema=True) 6303 options = self._parse_key_constraint_options() 6304 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6305 6306 def _parse_foreign_key(self) -> exp.ForeignKey: 6307 expressions = ( 6308 self._parse_wrapped_id_vars() 6309 if not self._match(TokenType.REFERENCES, advance=False) 6310 else None 6311 ) 6312 reference = self._parse_references() 6313 on_options = {} 6314 6315 while self._match(TokenType.ON): 6316 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6317 self.raise_error("Expected DELETE or UPDATE") 6318 6319 kind = self._prev.text.lower() 6320 6321 if self._match_text_seq("NO", "ACTION"): 6322 action = "NO ACTION" 6323 elif self._match(TokenType.SET): 6324 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6325 action = "SET " + self._prev.text.upper() 6326 else: 6327 self._advance() 6328 action = self._prev.text.upper() 6329 6330 on_options[kind] = action 6331 6332 return self.expression( 6333 exp.ForeignKey, 6334 expressions=expressions, 6335 reference=reference, 6336 options=self._parse_key_constraint_options(), 6337 **on_options, # type: ignore 6338 ) 6339 6340 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6341 return self._parse_ordered() or self._parse_field() 6342 6343 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6344 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6345 self._retreat(self._index - 1) 6346 return None 6347 6348 id_vars = self._parse_wrapped_id_vars() 6349 return self.expression( 6350 exp.PeriodForSystemTimeConstraint, 6351 this=seq_get(id_vars, 0), 6352 expression=seq_get(id_vars, 1), 6353 ) 6354 6355 def _parse_primary_key( 6356 self, wrapped_optional: bool = False, in_props: bool = False 6357 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6358 desc = ( 6359 self._match_set((TokenType.ASC, TokenType.DESC)) 6360 and self._prev.token_type == TokenType.DESC 6361 ) 6362 6363 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6364 return self.expression( 6365 exp.PrimaryKeyColumnConstraint, 6366 desc=desc, 6367 options=self._parse_key_constraint_options(), 6368 ) 6369 6370 expressions = self._parse_wrapped_csv( 6371 self._parse_primary_key_part, optional=wrapped_optional 6372 ) 6373 6374 return self.expression( 6375 exp.PrimaryKey, 6376 expressions=expressions, 6377 include=self._parse_index_params(), 6378 options=self._parse_key_constraint_options(), 6379 ) 6380 6381 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6382 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6383 6384 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6385 """ 6386 Parses a datetime column in ODBC format. We parse the column into the corresponding 6387 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6388 same as we did for `DATE('yyyy-mm-dd')`. 6389 6390 Reference: 6391 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6392 """ 6393 self._match(TokenType.VAR) 6394 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6395 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6396 if not self._match(TokenType.R_BRACE): 6397 self.raise_error("Expected }") 6398 return expression 6399 6400 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6401 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6402 return this 6403 6404 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6405 map_token = seq_get(self._tokens, self._index - 2) 6406 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6407 else: 6408 parse_map = False 6409 6410 bracket_kind = self._prev.token_type 6411 if ( 6412 bracket_kind == TokenType.L_BRACE 6413 and self._curr 6414 and self._curr.token_type == TokenType.VAR 6415 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6416 ): 6417 return self._parse_odbc_datetime_literal() 6418 6419 expressions = self._parse_csv( 6420 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6421 ) 6422 6423 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6424 self.raise_error("Expected ]") 6425 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6426 self.raise_error("Expected }") 6427 6428 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6429 if bracket_kind == TokenType.L_BRACE: 6430 this = self.expression( 6431 exp.Struct, 6432 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6433 ) 6434 elif not this: 6435 this = build_array_constructor( 6436 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6437 ) 6438 else: 6439 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6440 if constructor_type: 6441 return build_array_constructor( 6442 constructor_type, 6443 args=expressions, 6444 bracket_kind=bracket_kind, 6445 dialect=self.dialect, 6446 ) 6447 6448 expressions = apply_index_offset( 6449 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6450 ) 6451 this = self.expression( 6452 exp.Bracket, 6453 this=this, 6454 expressions=expressions, 6455 comments=this.pop_comments(), 6456 ) 6457 6458 self._add_comments(this) 6459 return self._parse_bracket(this) 6460 6461 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6462 if self._match(TokenType.COLON): 6463 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6464 return this 6465 6466 def _parse_case(self) -> t.Optional[exp.Expression]: 6467 ifs = [] 6468 default = None 6469 6470 comments = self._prev_comments 6471 expression = self._parse_assignment() 6472 6473 while self._match(TokenType.WHEN): 6474 this = self._parse_assignment() 6475 self._match(TokenType.THEN) 6476 then = self._parse_assignment() 6477 ifs.append(self.expression(exp.If, this=this, true=then)) 6478 6479 if self._match(TokenType.ELSE): 6480 default = self._parse_assignment() 6481 6482 if not self._match(TokenType.END): 6483 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6484 default = exp.column("interval") 6485 else: 6486 self.raise_error("Expected END after CASE", self._prev) 6487 6488 return self.expression( 6489 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6490 ) 6491 6492 def _parse_if(self) -> t.Optional[exp.Expression]: 6493 if self._match(TokenType.L_PAREN): 6494 args = self._parse_csv( 6495 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6496 ) 6497 this = self.validate_expression(exp.If.from_arg_list(args), args) 6498 self._match_r_paren() 6499 else: 6500 index = self._index - 1 6501 6502 if self.NO_PAREN_IF_COMMANDS and index == 0: 6503 return self._parse_as_command(self._prev) 6504 6505 condition = self._parse_assignment() 6506 6507 if not condition: 6508 self._retreat(index) 6509 return None 6510 6511 self._match(TokenType.THEN) 6512 true = self._parse_assignment() 6513 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6514 self._match(TokenType.END) 6515 this = self.expression(exp.If, this=condition, true=true, false=false) 6516 6517 return this 6518 6519 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6520 if not self._match_text_seq("VALUE", "FOR"): 6521 self._retreat(self._index - 1) 6522 return None 6523 6524 return self.expression( 6525 exp.NextValueFor, 6526 this=self._parse_column(), 6527 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6528 ) 6529 6530 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6531 this = self._parse_function() or self._parse_var_or_string(upper=True) 6532 6533 if self._match(TokenType.FROM): 6534 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6535 6536 if not self._match(TokenType.COMMA): 6537 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6538 6539 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6540 6541 def _parse_gap_fill(self) -> exp.GapFill: 6542 self._match(TokenType.TABLE) 6543 this = self._parse_table() 6544 6545 self._match(TokenType.COMMA) 6546 args = [this, *self._parse_csv(self._parse_lambda)] 6547 6548 gap_fill = exp.GapFill.from_arg_list(args) 6549 return self.validate_expression(gap_fill, args) 6550 6551 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6552 this = self._parse_assignment() 6553 6554 if not self._match(TokenType.ALIAS): 6555 if self._match(TokenType.COMMA): 6556 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6557 6558 self.raise_error("Expected AS after CAST") 6559 6560 fmt = None 6561 to = self._parse_types() 6562 6563 default = self._match(TokenType.DEFAULT) 6564 if default: 6565 default = self._parse_bitwise() 6566 self._match_text_seq("ON", "CONVERSION", "ERROR") 6567 6568 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6569 fmt_string = self._parse_string() 6570 fmt = self._parse_at_time_zone(fmt_string) 6571 6572 if not to: 6573 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6574 if to.this in exp.DataType.TEMPORAL_TYPES: 6575 this = self.expression( 6576 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6577 this=this, 6578 format=exp.Literal.string( 6579 format_time( 6580 fmt_string.this if fmt_string else "", 6581 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6582 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6583 ) 6584 ), 6585 safe=safe, 6586 ) 6587 6588 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6589 this.set("zone", fmt.args["zone"]) 6590 return this 6591 elif not to: 6592 self.raise_error("Expected TYPE after CAST") 6593 elif isinstance(to, exp.Identifier): 6594 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6595 elif to.this == exp.DataType.Type.CHAR: 6596 if self._match(TokenType.CHARACTER_SET): 6597 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6598 6599 return self.build_cast( 6600 strict=strict, 6601 this=this, 6602 to=to, 6603 format=fmt, 6604 safe=safe, 6605 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6606 default=default, 6607 ) 6608 6609 def _parse_string_agg(self) -> exp.GroupConcat: 6610 if self._match(TokenType.DISTINCT): 6611 args: t.List[t.Optional[exp.Expression]] = [ 6612 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6613 ] 6614 if self._match(TokenType.COMMA): 6615 args.extend(self._parse_csv(self._parse_assignment)) 6616 else: 6617 args = self._parse_csv(self._parse_assignment) # type: ignore 6618 6619 if self._match_text_seq("ON", "OVERFLOW"): 6620 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6621 if self._match_text_seq("ERROR"): 6622 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6623 else: 6624 self._match_text_seq("TRUNCATE") 6625 on_overflow = self.expression( 6626 exp.OverflowTruncateBehavior, 6627 this=self._parse_string(), 6628 with_count=( 6629 self._match_text_seq("WITH", "COUNT") 6630 or not self._match_text_seq("WITHOUT", "COUNT") 6631 ), 6632 ) 6633 else: 6634 on_overflow = None 6635 6636 index = self._index 6637 if not self._match(TokenType.R_PAREN) and args: 6638 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6639 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6640 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6641 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6642 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6643 6644 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6645 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6646 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6647 if not self._match_text_seq("WITHIN", "GROUP"): 6648 self._retreat(index) 6649 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6650 6651 # The corresponding match_r_paren will be called in parse_function (caller) 6652 self._match_l_paren() 6653 6654 return self.expression( 6655 exp.GroupConcat, 6656 this=self._parse_order(this=seq_get(args, 0)), 6657 separator=seq_get(args, 1), 6658 on_overflow=on_overflow, 6659 ) 6660 6661 def _parse_convert( 6662 self, strict: bool, safe: t.Optional[bool] = None 6663 ) -> t.Optional[exp.Expression]: 6664 this = self._parse_bitwise() 6665 6666 if self._match(TokenType.USING): 6667 to: t.Optional[exp.Expression] = self.expression( 6668 exp.CharacterSet, this=self._parse_var() 6669 ) 6670 elif self._match(TokenType.COMMA): 6671 to = self._parse_types() 6672 else: 6673 to = None 6674 6675 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6676 6677 def _parse_xml_table(self) -> exp.XMLTable: 6678 namespaces = None 6679 passing = None 6680 columns = None 6681 6682 if self._match_text_seq("XMLNAMESPACES", "("): 6683 namespaces = self._parse_xml_namespace() 6684 self._match_text_seq(")", ",") 6685 6686 this = self._parse_string() 6687 6688 if self._match_text_seq("PASSING"): 6689 # The BY VALUE keywords are optional and are provided for semantic clarity 6690 self._match_text_seq("BY", "VALUE") 6691 passing = self._parse_csv(self._parse_column) 6692 6693 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6694 6695 if self._match_text_seq("COLUMNS"): 6696 columns = self._parse_csv(self._parse_field_def) 6697 6698 return self.expression( 6699 exp.XMLTable, 6700 this=this, 6701 namespaces=namespaces, 6702 passing=passing, 6703 columns=columns, 6704 by_ref=by_ref, 6705 ) 6706 6707 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6708 namespaces = [] 6709 6710 while True: 6711 if self._match(TokenType.DEFAULT): 6712 uri = self._parse_string() 6713 else: 6714 uri = self._parse_alias(self._parse_string()) 6715 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6716 if not self._match(TokenType.COMMA): 6717 break 6718 6719 return namespaces 6720 6721 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6722 args = self._parse_csv(self._parse_assignment) 6723 6724 if len(args) < 3: 6725 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6726 6727 return self.expression(exp.DecodeCase, expressions=args) 6728 6729 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6730 self._match_text_seq("KEY") 6731 key = self._parse_column() 6732 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6733 self._match_text_seq("VALUE") 6734 value = self._parse_bitwise() 6735 6736 if not key and not value: 6737 return None 6738 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6739 6740 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6741 if not this or not self._match_text_seq("FORMAT", "JSON"): 6742 return this 6743 6744 return self.expression(exp.FormatJson, this=this) 6745 6746 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6747 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6748 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6749 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6750 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6751 else: 6752 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6753 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6754 6755 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6756 6757 if not empty and not error and not null: 6758 return None 6759 6760 return self.expression( 6761 exp.OnCondition, 6762 empty=empty, 6763 error=error, 6764 null=null, 6765 ) 6766 6767 def _parse_on_handling( 6768 self, on: str, *values: str 6769 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6770 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6771 for value in values: 6772 if self._match_text_seq(value, "ON", on): 6773 return f"{value} ON {on}" 6774 6775 index = self._index 6776 if self._match(TokenType.DEFAULT): 6777 default_value = self._parse_bitwise() 6778 if self._match_text_seq("ON", on): 6779 return default_value 6780 6781 self._retreat(index) 6782 6783 return None 6784 6785 @t.overload 6786 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6787 6788 @t.overload 6789 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6790 6791 def _parse_json_object(self, agg=False): 6792 star = self._parse_star() 6793 expressions = ( 6794 [star] 6795 if star 6796 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6797 ) 6798 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6799 6800 unique_keys = None 6801 if self._match_text_seq("WITH", "UNIQUE"): 6802 unique_keys = True 6803 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6804 unique_keys = False 6805 6806 self._match_text_seq("KEYS") 6807 6808 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6809 self._parse_type() 6810 ) 6811 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6812 6813 return self.expression( 6814 exp.JSONObjectAgg if agg else exp.JSONObject, 6815 expressions=expressions, 6816 null_handling=null_handling, 6817 unique_keys=unique_keys, 6818 return_type=return_type, 6819 encoding=encoding, 6820 ) 6821 6822 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6823 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6824 if not self._match_text_seq("NESTED"): 6825 this = self._parse_id_var() 6826 kind = self._parse_types(allow_identifiers=False) 6827 nested = None 6828 else: 6829 this = None 6830 kind = None 6831 nested = True 6832 6833 path = self._match_text_seq("PATH") and self._parse_string() 6834 nested_schema = nested and self._parse_json_schema() 6835 6836 return self.expression( 6837 exp.JSONColumnDef, 6838 this=this, 6839 kind=kind, 6840 path=path, 6841 nested_schema=nested_schema, 6842 ) 6843 6844 def _parse_json_schema(self) -> exp.JSONSchema: 6845 self._match_text_seq("COLUMNS") 6846 return self.expression( 6847 exp.JSONSchema, 6848 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6849 ) 6850 6851 def _parse_json_table(self) -> exp.JSONTable: 6852 this = self._parse_format_json(self._parse_bitwise()) 6853 path = self._match(TokenType.COMMA) and self._parse_string() 6854 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6855 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6856 schema = self._parse_json_schema() 6857 6858 return exp.JSONTable( 6859 this=this, 6860 schema=schema, 6861 path=path, 6862 error_handling=error_handling, 6863 empty_handling=empty_handling, 6864 ) 6865 6866 def _parse_match_against(self) -> exp.MatchAgainst: 6867 if self._match_text_seq("TABLE"): 6868 # parse SingleStore MATCH(TABLE ...) syntax 6869 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6870 expressions = [] 6871 table = self._parse_table() 6872 if table: 6873 expressions = [table] 6874 else: 6875 expressions = self._parse_csv(self._parse_column) 6876 6877 self._match_text_seq(")", "AGAINST", "(") 6878 6879 this = self._parse_string() 6880 6881 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6882 modifier = "IN NATURAL LANGUAGE MODE" 6883 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6884 modifier = f"{modifier} WITH QUERY EXPANSION" 6885 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6886 modifier = "IN BOOLEAN MODE" 6887 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6888 modifier = "WITH QUERY EXPANSION" 6889 else: 6890 modifier = None 6891 6892 return self.expression( 6893 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6894 ) 6895 6896 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6897 def _parse_open_json(self) -> exp.OpenJSON: 6898 this = self._parse_bitwise() 6899 path = self._match(TokenType.COMMA) and self._parse_string() 6900 6901 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6902 this = self._parse_field(any_token=True) 6903 kind = self._parse_types() 6904 path = self._parse_string() 6905 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6906 6907 return self.expression( 6908 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6909 ) 6910 6911 expressions = None 6912 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6913 self._match_l_paren() 6914 expressions = self._parse_csv(_parse_open_json_column_def) 6915 6916 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6917 6918 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6919 args = self._parse_csv(self._parse_bitwise) 6920 6921 if self._match(TokenType.IN): 6922 return self.expression( 6923 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6924 ) 6925 6926 if haystack_first: 6927 haystack = seq_get(args, 0) 6928 needle = seq_get(args, 1) 6929 else: 6930 haystack = seq_get(args, 1) 6931 needle = seq_get(args, 0) 6932 6933 return self.expression( 6934 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6935 ) 6936 6937 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6938 args = self._parse_csv(self._parse_table) 6939 return exp.JoinHint(this=func_name.upper(), expressions=args) 6940 6941 def _parse_substring(self) -> exp.Substring: 6942 # Postgres supports the form: substring(string [from int] [for int]) 6943 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6944 6945 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6946 6947 if self._match(TokenType.FROM): 6948 args.append(self._parse_bitwise()) 6949 if self._match(TokenType.FOR): 6950 if len(args) == 1: 6951 args.append(exp.Literal.number(1)) 6952 args.append(self._parse_bitwise()) 6953 6954 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6955 6956 def _parse_trim(self) -> exp.Trim: 6957 # https://www.w3resource.com/sql/character-functions/trim.php 6958 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6959 6960 position = None 6961 collation = None 6962 expression = None 6963 6964 if self._match_texts(self.TRIM_TYPES): 6965 position = self._prev.text.upper() 6966 6967 this = self._parse_bitwise() 6968 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6969 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6970 expression = self._parse_bitwise() 6971 6972 if invert_order: 6973 this, expression = expression, this 6974 6975 if self._match(TokenType.COLLATE): 6976 collation = self._parse_bitwise() 6977 6978 return self.expression( 6979 exp.Trim, this=this, position=position, expression=expression, collation=collation 6980 ) 6981 6982 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6983 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6984 6985 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6986 return self._parse_window(self._parse_id_var(), alias=True) 6987 6988 def _parse_respect_or_ignore_nulls( 6989 self, this: t.Optional[exp.Expression] 6990 ) -> t.Optional[exp.Expression]: 6991 if self._match_text_seq("IGNORE", "NULLS"): 6992 return self.expression(exp.IgnoreNulls, this=this) 6993 if self._match_text_seq("RESPECT", "NULLS"): 6994 return self.expression(exp.RespectNulls, this=this) 6995 return this 6996 6997 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6998 if self._match(TokenType.HAVING): 6999 self._match_texts(("MAX", "MIN")) 7000 max = self._prev.text.upper() != "MIN" 7001 return self.expression( 7002 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7003 ) 7004 7005 return this 7006 7007 def _parse_window( 7008 self, this: t.Optional[exp.Expression], alias: bool = False 7009 ) -> t.Optional[exp.Expression]: 7010 func = this 7011 comments = func.comments if isinstance(func, exp.Expression) else None 7012 7013 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7014 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7015 if self._match_text_seq("WITHIN", "GROUP"): 7016 order = self._parse_wrapped(self._parse_order) 7017 this = self.expression(exp.WithinGroup, this=this, expression=order) 7018 7019 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7020 self._match(TokenType.WHERE) 7021 this = self.expression( 7022 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7023 ) 7024 self._match_r_paren() 7025 7026 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7027 # Some dialects choose to implement and some do not. 7028 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7029 7030 # There is some code above in _parse_lambda that handles 7031 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7032 7033 # The below changes handle 7034 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7035 7036 # Oracle allows both formats 7037 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7038 # and Snowflake chose to do the same for familiarity 7039 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7040 if isinstance(this, exp.AggFunc): 7041 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7042 7043 if ignore_respect and ignore_respect is not this: 7044 ignore_respect.replace(ignore_respect.this) 7045 this = self.expression(ignore_respect.__class__, this=this) 7046 7047 this = self._parse_respect_or_ignore_nulls(this) 7048 7049 # bigquery select from window x AS (partition by ...) 7050 if alias: 7051 over = None 7052 self._match(TokenType.ALIAS) 7053 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7054 return this 7055 else: 7056 over = self._prev.text.upper() 7057 7058 if comments and isinstance(func, exp.Expression): 7059 func.pop_comments() 7060 7061 if not self._match(TokenType.L_PAREN): 7062 return self.expression( 7063 exp.Window, 7064 comments=comments, 7065 this=this, 7066 alias=self._parse_id_var(False), 7067 over=over, 7068 ) 7069 7070 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7071 7072 first = self._match(TokenType.FIRST) 7073 if self._match_text_seq("LAST"): 7074 first = False 7075 7076 partition, order = self._parse_partition_and_order() 7077 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7078 7079 if kind: 7080 self._match(TokenType.BETWEEN) 7081 start = self._parse_window_spec() 7082 self._match(TokenType.AND) 7083 end = self._parse_window_spec() 7084 exclude = ( 7085 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7086 if self._match_text_seq("EXCLUDE") 7087 else None 7088 ) 7089 7090 spec = self.expression( 7091 exp.WindowSpec, 7092 kind=kind, 7093 start=start["value"], 7094 start_side=start["side"], 7095 end=end["value"], 7096 end_side=end["side"], 7097 exclude=exclude, 7098 ) 7099 else: 7100 spec = None 7101 7102 self._match_r_paren() 7103 7104 window = self.expression( 7105 exp.Window, 7106 comments=comments, 7107 this=this, 7108 partition_by=partition, 7109 order=order, 7110 spec=spec, 7111 alias=window_alias, 7112 over=over, 7113 first=first, 7114 ) 7115 7116 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7117 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7118 return self._parse_window(window, alias=alias) 7119 7120 return window 7121 7122 def _parse_partition_and_order( 7123 self, 7124 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7125 return self._parse_partition_by(), self._parse_order() 7126 7127 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7128 self._match(TokenType.BETWEEN) 7129 7130 return { 7131 "value": ( 7132 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7133 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7134 or self._parse_bitwise() 7135 ), 7136 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7137 } 7138 7139 def _parse_alias( 7140 self, this: t.Optional[exp.Expression], explicit: bool = False 7141 ) -> t.Optional[exp.Expression]: 7142 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7143 # so this section tries to parse the clause version and if it fails, it treats the token 7144 # as an identifier (alias) 7145 if self._can_parse_limit_or_offset(): 7146 return this 7147 7148 any_token = self._match(TokenType.ALIAS) 7149 comments = self._prev_comments or [] 7150 7151 if explicit and not any_token: 7152 return this 7153 7154 if self._match(TokenType.L_PAREN): 7155 aliases = self.expression( 7156 exp.Aliases, 7157 comments=comments, 7158 this=this, 7159 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7160 ) 7161 self._match_r_paren(aliases) 7162 return aliases 7163 7164 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7165 self.STRING_ALIASES and self._parse_string_as_identifier() 7166 ) 7167 7168 if alias: 7169 comments.extend(alias.pop_comments()) 7170 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7171 column = this.this 7172 7173 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7174 if not this.comments and column and column.comments: 7175 this.comments = column.pop_comments() 7176 7177 return this 7178 7179 def _parse_id_var( 7180 self, 7181 any_token: bool = True, 7182 tokens: t.Optional[t.Collection[TokenType]] = None, 7183 ) -> t.Optional[exp.Expression]: 7184 expression = self._parse_identifier() 7185 if not expression and ( 7186 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7187 ): 7188 quoted = self._prev.token_type == TokenType.STRING 7189 expression = self._identifier_expression(quoted=quoted) 7190 7191 return expression 7192 7193 def _parse_string(self) -> t.Optional[exp.Expression]: 7194 if self._match_set(self.STRING_PARSERS): 7195 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7196 return self._parse_placeholder() 7197 7198 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7199 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7200 if output: 7201 output.update_positions(self._prev) 7202 return output 7203 7204 def _parse_number(self) -> t.Optional[exp.Expression]: 7205 if self._match_set(self.NUMERIC_PARSERS): 7206 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7207 return self._parse_placeholder() 7208 7209 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7210 if self._match(TokenType.IDENTIFIER): 7211 return self._identifier_expression(quoted=True) 7212 return self._parse_placeholder() 7213 7214 def _parse_var( 7215 self, 7216 any_token: bool = False, 7217 tokens: t.Optional[t.Collection[TokenType]] = None, 7218 upper: bool = False, 7219 ) -> t.Optional[exp.Expression]: 7220 if ( 7221 (any_token and self._advance_any()) 7222 or self._match(TokenType.VAR) 7223 or (self._match_set(tokens) if tokens else False) 7224 ): 7225 return self.expression( 7226 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7227 ) 7228 return self._parse_placeholder() 7229 7230 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7231 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7232 self._advance() 7233 return self._prev 7234 return None 7235 7236 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7237 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7238 7239 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7240 return self._parse_primary() or self._parse_var(any_token=True) 7241 7242 def _parse_null(self) -> t.Optional[exp.Expression]: 7243 if self._match_set(self.NULL_TOKENS): 7244 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7245 return self._parse_placeholder() 7246 7247 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7248 if self._match(TokenType.TRUE): 7249 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7250 if self._match(TokenType.FALSE): 7251 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7252 return self._parse_placeholder() 7253 7254 def _parse_star(self) -> t.Optional[exp.Expression]: 7255 if self._match(TokenType.STAR): 7256 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7257 return self._parse_placeholder() 7258 7259 def _parse_parameter(self) -> exp.Parameter: 7260 this = self._parse_identifier() or self._parse_primary_or_var() 7261 return self.expression(exp.Parameter, this=this) 7262 7263 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7264 if self._match_set(self.PLACEHOLDER_PARSERS): 7265 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7266 if placeholder: 7267 return placeholder 7268 self._advance(-1) 7269 return None 7270 7271 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7272 if not self._match_texts(keywords): 7273 return None 7274 if self._match(TokenType.L_PAREN, advance=False): 7275 return self._parse_wrapped_csv(self._parse_expression) 7276 7277 expression = self._parse_expression() 7278 return [expression] if expression else None 7279 7280 def _parse_csv( 7281 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7282 ) -> t.List[exp.Expression]: 7283 parse_result = parse_method() 7284 items = [parse_result] if parse_result is not None else [] 7285 7286 while self._match(sep): 7287 self._add_comments(parse_result) 7288 parse_result = parse_method() 7289 if parse_result is not None: 7290 items.append(parse_result) 7291 7292 return items 7293 7294 def _parse_tokens( 7295 self, parse_method: t.Callable, expressions: t.Dict 7296 ) -> t.Optional[exp.Expression]: 7297 this = parse_method() 7298 7299 while self._match_set(expressions): 7300 this = self.expression( 7301 expressions[self._prev.token_type], 7302 this=this, 7303 comments=self._prev_comments, 7304 expression=parse_method(), 7305 ) 7306 7307 return this 7308 7309 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7310 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7311 7312 def _parse_wrapped_csv( 7313 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7314 ) -> t.List[exp.Expression]: 7315 return self._parse_wrapped( 7316 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7317 ) 7318 7319 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7320 wrapped = self._match(TokenType.L_PAREN) 7321 if not wrapped and not optional: 7322 self.raise_error("Expecting (") 7323 parse_result = parse_method() 7324 if wrapped: 7325 self._match_r_paren() 7326 return parse_result 7327 7328 def _parse_expressions(self) -> t.List[exp.Expression]: 7329 return self._parse_csv(self._parse_expression) 7330 7331 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7332 return ( 7333 self._parse_set_operations( 7334 self._parse_alias(self._parse_assignment(), explicit=True) 7335 if alias 7336 else self._parse_assignment() 7337 ) 7338 or self._parse_select() 7339 ) 7340 7341 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7342 return self._parse_query_modifiers( 7343 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7344 ) 7345 7346 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7347 this = None 7348 if self._match_texts(self.TRANSACTION_KIND): 7349 this = self._prev.text 7350 7351 self._match_texts(("TRANSACTION", "WORK")) 7352 7353 modes = [] 7354 while True: 7355 mode = [] 7356 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7357 mode.append(self._prev.text) 7358 7359 if mode: 7360 modes.append(" ".join(mode)) 7361 if not self._match(TokenType.COMMA): 7362 break 7363 7364 return self.expression(exp.Transaction, this=this, modes=modes) 7365 7366 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7367 chain = None 7368 savepoint = None 7369 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7370 7371 self._match_texts(("TRANSACTION", "WORK")) 7372 7373 if self._match_text_seq("TO"): 7374 self._match_text_seq("SAVEPOINT") 7375 savepoint = self._parse_id_var() 7376 7377 if self._match(TokenType.AND): 7378 chain = not self._match_text_seq("NO") 7379 self._match_text_seq("CHAIN") 7380 7381 if is_rollback: 7382 return self.expression(exp.Rollback, savepoint=savepoint) 7383 7384 return self.expression(exp.Commit, chain=chain) 7385 7386 def _parse_refresh(self) -> exp.Refresh: 7387 self._match(TokenType.TABLE) 7388 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7389 7390 def _parse_column_def_with_exists(self): 7391 start = self._index 7392 self._match(TokenType.COLUMN) 7393 7394 exists_column = self._parse_exists(not_=True) 7395 expression = self._parse_field_def() 7396 7397 if not isinstance(expression, exp.ColumnDef): 7398 self._retreat(start) 7399 return None 7400 7401 expression.set("exists", exists_column) 7402 7403 return expression 7404 7405 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7406 if not self._prev.text.upper() == "ADD": 7407 return None 7408 7409 expression = self._parse_column_def_with_exists() 7410 if not expression: 7411 return None 7412 7413 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7414 if self._match_texts(("FIRST", "AFTER")): 7415 position = self._prev.text 7416 column_position = self.expression( 7417 exp.ColumnPosition, this=self._parse_column(), position=position 7418 ) 7419 expression.set("position", column_position) 7420 7421 return expression 7422 7423 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7424 drop = self._match(TokenType.DROP) and self._parse_drop() 7425 if drop and not isinstance(drop, exp.Command): 7426 drop.set("kind", drop.args.get("kind", "COLUMN")) 7427 return drop 7428 7429 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7430 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7431 return self.expression( 7432 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7433 ) 7434 7435 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7436 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7437 self._match_text_seq("ADD") 7438 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7439 return self.expression( 7440 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7441 ) 7442 7443 column_def = self._parse_add_column() 7444 if isinstance(column_def, exp.ColumnDef): 7445 return column_def 7446 7447 exists = self._parse_exists(not_=True) 7448 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7449 return self.expression( 7450 exp.AddPartition, 7451 exists=exists, 7452 this=self._parse_field(any_token=True), 7453 location=self._match_text_seq("LOCATION", advance=False) 7454 and self._parse_property(), 7455 ) 7456 7457 return None 7458 7459 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7460 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7461 or self._match_text_seq("COLUMNS") 7462 ): 7463 schema = self._parse_schema() 7464 7465 return ( 7466 ensure_list(schema) 7467 if schema 7468 else self._parse_csv(self._parse_column_def_with_exists) 7469 ) 7470 7471 return self._parse_csv(_parse_add_alteration) 7472 7473 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7474 if self._match_texts(self.ALTER_ALTER_PARSERS): 7475 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7476 7477 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7478 # keyword after ALTER we default to parsing this statement 7479 self._match(TokenType.COLUMN) 7480 column = self._parse_field(any_token=True) 7481 7482 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7483 return self.expression(exp.AlterColumn, this=column, drop=True) 7484 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7485 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7486 if self._match(TokenType.COMMENT): 7487 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7488 if self._match_text_seq("DROP", "NOT", "NULL"): 7489 return self.expression( 7490 exp.AlterColumn, 7491 this=column, 7492 drop=True, 7493 allow_null=True, 7494 ) 7495 if self._match_text_seq("SET", "NOT", "NULL"): 7496 return self.expression( 7497 exp.AlterColumn, 7498 this=column, 7499 allow_null=False, 7500 ) 7501 7502 if self._match_text_seq("SET", "VISIBLE"): 7503 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7504 if self._match_text_seq("SET", "INVISIBLE"): 7505 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7506 7507 self._match_text_seq("SET", "DATA") 7508 self._match_text_seq("TYPE") 7509 return self.expression( 7510 exp.AlterColumn, 7511 this=column, 7512 dtype=self._parse_types(), 7513 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7514 using=self._match(TokenType.USING) and self._parse_assignment(), 7515 ) 7516 7517 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7518 if self._match_texts(("ALL", "EVEN", "AUTO")): 7519 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7520 7521 self._match_text_seq("KEY", "DISTKEY") 7522 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7523 7524 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7525 if compound: 7526 self._match_text_seq("SORTKEY") 7527 7528 if self._match(TokenType.L_PAREN, advance=False): 7529 return self.expression( 7530 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7531 ) 7532 7533 self._match_texts(("AUTO", "NONE")) 7534 return self.expression( 7535 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7536 ) 7537 7538 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7539 index = self._index - 1 7540 7541 partition_exists = self._parse_exists() 7542 if self._match(TokenType.PARTITION, advance=False): 7543 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7544 7545 self._retreat(index) 7546 return self._parse_csv(self._parse_drop_column) 7547 7548 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7549 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7550 exists = self._parse_exists() 7551 old_column = self._parse_column() 7552 to = self._match_text_seq("TO") 7553 new_column = self._parse_column() 7554 7555 if old_column is None or to is None or new_column is None: 7556 return None 7557 7558 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7559 7560 self._match_text_seq("TO") 7561 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7562 7563 def _parse_alter_table_set(self) -> exp.AlterSet: 7564 alter_set = self.expression(exp.AlterSet) 7565 7566 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7567 "TABLE", "PROPERTIES" 7568 ): 7569 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7570 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7571 alter_set.set("expressions", [self._parse_assignment()]) 7572 elif self._match_texts(("LOGGED", "UNLOGGED")): 7573 alter_set.set("option", exp.var(self._prev.text.upper())) 7574 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7575 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7576 elif self._match_text_seq("LOCATION"): 7577 alter_set.set("location", self._parse_field()) 7578 elif self._match_text_seq("ACCESS", "METHOD"): 7579 alter_set.set("access_method", self._parse_field()) 7580 elif self._match_text_seq("TABLESPACE"): 7581 alter_set.set("tablespace", self._parse_field()) 7582 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7583 alter_set.set("file_format", [self._parse_field()]) 7584 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7585 alter_set.set("file_format", self._parse_wrapped_options()) 7586 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7587 alter_set.set("copy_options", self._parse_wrapped_options()) 7588 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7589 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7590 else: 7591 if self._match_text_seq("SERDE"): 7592 alter_set.set("serde", self._parse_field()) 7593 7594 properties = self._parse_wrapped(self._parse_properties, optional=True) 7595 alter_set.set("expressions", [properties]) 7596 7597 return alter_set 7598 7599 def _parse_alter_session(self) -> exp.AlterSession: 7600 """Parse ALTER SESSION SET/UNSET statements.""" 7601 if self._match(TokenType.SET): 7602 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7603 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7604 7605 self._match_text_seq("UNSET") 7606 expressions = self._parse_csv( 7607 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7608 ) 7609 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7610 7611 def _parse_alter(self) -> exp.Alter | exp.Command: 7612 start = self._prev 7613 7614 alter_token = self._match_set(self.ALTERABLES) and self._prev 7615 if not alter_token: 7616 return self._parse_as_command(start) 7617 7618 exists = self._parse_exists() 7619 only = self._match_text_seq("ONLY") 7620 7621 if alter_token.token_type == TokenType.SESSION: 7622 this = None 7623 check = None 7624 cluster = None 7625 else: 7626 this = self._parse_table(schema=True) 7627 check = self._match_text_seq("WITH", "CHECK") 7628 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7629 7630 if self._next: 7631 self._advance() 7632 7633 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7634 if parser: 7635 actions = ensure_list(parser(self)) 7636 not_valid = self._match_text_seq("NOT", "VALID") 7637 options = self._parse_csv(self._parse_property) 7638 7639 if not self._curr and actions: 7640 return self.expression( 7641 exp.Alter, 7642 this=this, 7643 kind=alter_token.text.upper(), 7644 exists=exists, 7645 actions=actions, 7646 only=only, 7647 options=options, 7648 cluster=cluster, 7649 not_valid=not_valid, 7650 check=check, 7651 ) 7652 7653 return self._parse_as_command(start) 7654 7655 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7656 start = self._prev 7657 # https://duckdb.org/docs/sql/statements/analyze 7658 if not self._curr: 7659 return self.expression(exp.Analyze) 7660 7661 options = [] 7662 while self._match_texts(self.ANALYZE_STYLES): 7663 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7664 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7665 else: 7666 options.append(self._prev.text.upper()) 7667 7668 this: t.Optional[exp.Expression] = None 7669 inner_expression: t.Optional[exp.Expression] = None 7670 7671 kind = self._curr and self._curr.text.upper() 7672 7673 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7674 this = self._parse_table_parts() 7675 elif self._match_text_seq("TABLES"): 7676 if self._match_set((TokenType.FROM, TokenType.IN)): 7677 kind = f"{kind} {self._prev.text.upper()}" 7678 this = self._parse_table(schema=True, is_db_reference=True) 7679 elif self._match_text_seq("DATABASE"): 7680 this = self._parse_table(schema=True, is_db_reference=True) 7681 elif self._match_text_seq("CLUSTER"): 7682 this = self._parse_table() 7683 # Try matching inner expr keywords before fallback to parse table. 7684 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7685 kind = None 7686 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7687 else: 7688 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7689 kind = None 7690 this = self._parse_table_parts() 7691 7692 partition = self._try_parse(self._parse_partition) 7693 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7694 return self._parse_as_command(start) 7695 7696 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7697 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7698 "WITH", "ASYNC", "MODE" 7699 ): 7700 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7701 else: 7702 mode = None 7703 7704 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7705 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7706 7707 properties = self._parse_properties() 7708 return self.expression( 7709 exp.Analyze, 7710 kind=kind, 7711 this=this, 7712 mode=mode, 7713 partition=partition, 7714 properties=properties, 7715 expression=inner_expression, 7716 options=options, 7717 ) 7718 7719 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7720 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7721 this = None 7722 kind = self._prev.text.upper() 7723 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7724 expressions = [] 7725 7726 if not self._match_text_seq("STATISTICS"): 7727 self.raise_error("Expecting token STATISTICS") 7728 7729 if self._match_text_seq("NOSCAN"): 7730 this = "NOSCAN" 7731 elif self._match(TokenType.FOR): 7732 if self._match_text_seq("ALL", "COLUMNS"): 7733 this = "FOR ALL COLUMNS" 7734 if self._match_texts("COLUMNS"): 7735 this = "FOR COLUMNS" 7736 expressions = self._parse_csv(self._parse_column_reference) 7737 elif self._match_text_seq("SAMPLE"): 7738 sample = self._parse_number() 7739 expressions = [ 7740 self.expression( 7741 exp.AnalyzeSample, 7742 sample=sample, 7743 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7744 ) 7745 ] 7746 7747 return self.expression( 7748 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7749 ) 7750 7751 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7752 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7753 kind = None 7754 this = None 7755 expression: t.Optional[exp.Expression] = None 7756 if self._match_text_seq("REF", "UPDATE"): 7757 kind = "REF" 7758 this = "UPDATE" 7759 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7760 this = "UPDATE SET DANGLING TO NULL" 7761 elif self._match_text_seq("STRUCTURE"): 7762 kind = "STRUCTURE" 7763 if self._match_text_seq("CASCADE", "FAST"): 7764 this = "CASCADE FAST" 7765 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7766 ("ONLINE", "OFFLINE") 7767 ): 7768 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7769 expression = self._parse_into() 7770 7771 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7772 7773 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7774 this = self._prev.text.upper() 7775 if self._match_text_seq("COLUMNS"): 7776 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7777 return None 7778 7779 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7780 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7781 if self._match_text_seq("STATISTICS"): 7782 return self.expression(exp.AnalyzeDelete, kind=kind) 7783 return None 7784 7785 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7786 if self._match_text_seq("CHAINED", "ROWS"): 7787 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7788 return None 7789 7790 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7791 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7792 this = self._prev.text.upper() 7793 expression: t.Optional[exp.Expression] = None 7794 expressions = [] 7795 update_options = None 7796 7797 if self._match_text_seq("HISTOGRAM", "ON"): 7798 expressions = self._parse_csv(self._parse_column_reference) 7799 with_expressions = [] 7800 while self._match(TokenType.WITH): 7801 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7802 if self._match_texts(("SYNC", "ASYNC")): 7803 if self._match_text_seq("MODE", advance=False): 7804 with_expressions.append(f"{self._prev.text.upper()} MODE") 7805 self._advance() 7806 else: 7807 buckets = self._parse_number() 7808 if self._match_text_seq("BUCKETS"): 7809 with_expressions.append(f"{buckets} BUCKETS") 7810 if with_expressions: 7811 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7812 7813 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7814 TokenType.UPDATE, advance=False 7815 ): 7816 update_options = self._prev.text.upper() 7817 self._advance() 7818 elif self._match_text_seq("USING", "DATA"): 7819 expression = self.expression(exp.UsingData, this=self._parse_string()) 7820 7821 return self.expression( 7822 exp.AnalyzeHistogram, 7823 this=this, 7824 expressions=expressions, 7825 expression=expression, 7826 update_options=update_options, 7827 ) 7828 7829 def _parse_merge(self) -> exp.Merge: 7830 self._match(TokenType.INTO) 7831 target = self._parse_table() 7832 7833 if target and self._match(TokenType.ALIAS, advance=False): 7834 target.set("alias", self._parse_table_alias()) 7835 7836 self._match(TokenType.USING) 7837 using = self._parse_table() 7838 7839 self._match(TokenType.ON) 7840 on = self._parse_assignment() 7841 7842 return self.expression( 7843 exp.Merge, 7844 this=target, 7845 using=using, 7846 on=on, 7847 whens=self._parse_when_matched(), 7848 returning=self._parse_returning(), 7849 ) 7850 7851 def _parse_when_matched(self) -> exp.Whens: 7852 whens = [] 7853 7854 while self._match(TokenType.WHEN): 7855 matched = not self._match(TokenType.NOT) 7856 self._match_text_seq("MATCHED") 7857 source = ( 7858 False 7859 if self._match_text_seq("BY", "TARGET") 7860 else self._match_text_seq("BY", "SOURCE") 7861 ) 7862 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7863 7864 self._match(TokenType.THEN) 7865 7866 if self._match(TokenType.INSERT): 7867 this = self._parse_star() 7868 if this: 7869 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7870 else: 7871 then = self.expression( 7872 exp.Insert, 7873 this=exp.var("ROW") 7874 if self._match_text_seq("ROW") 7875 else self._parse_value(values=False), 7876 expression=self._match_text_seq("VALUES") and self._parse_value(), 7877 ) 7878 elif self._match(TokenType.UPDATE): 7879 expressions = self._parse_star() 7880 if expressions: 7881 then = self.expression(exp.Update, expressions=expressions) 7882 else: 7883 then = self.expression( 7884 exp.Update, 7885 expressions=self._match(TokenType.SET) 7886 and self._parse_csv(self._parse_equality), 7887 ) 7888 elif self._match(TokenType.DELETE): 7889 then = self.expression(exp.Var, this=self._prev.text) 7890 else: 7891 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7892 7893 whens.append( 7894 self.expression( 7895 exp.When, 7896 matched=matched, 7897 source=source, 7898 condition=condition, 7899 then=then, 7900 ) 7901 ) 7902 return self.expression(exp.Whens, expressions=whens) 7903 7904 def _parse_show(self) -> t.Optional[exp.Expression]: 7905 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7906 if parser: 7907 return parser(self) 7908 return self._parse_as_command(self._prev) 7909 7910 def _parse_set_item_assignment( 7911 self, kind: t.Optional[str] = None 7912 ) -> t.Optional[exp.Expression]: 7913 index = self._index 7914 7915 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7916 return self._parse_set_transaction(global_=kind == "GLOBAL") 7917 7918 left = self._parse_primary() or self._parse_column() 7919 assignment_delimiter = self._match_texts(("=", "TO")) 7920 7921 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7922 self._retreat(index) 7923 return None 7924 7925 right = self._parse_statement() or self._parse_id_var() 7926 if isinstance(right, (exp.Column, exp.Identifier)): 7927 right = exp.var(right.name) 7928 7929 this = self.expression(exp.EQ, this=left, expression=right) 7930 return self.expression(exp.SetItem, this=this, kind=kind) 7931 7932 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7933 self._match_text_seq("TRANSACTION") 7934 characteristics = self._parse_csv( 7935 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7936 ) 7937 return self.expression( 7938 exp.SetItem, 7939 expressions=characteristics, 7940 kind="TRANSACTION", 7941 **{"global": global_}, # type: ignore 7942 ) 7943 7944 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7945 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7946 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7947 7948 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7949 index = self._index 7950 set_ = self.expression( 7951 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7952 ) 7953 7954 if self._curr: 7955 self._retreat(index) 7956 return self._parse_as_command(self._prev) 7957 7958 return set_ 7959 7960 def _parse_var_from_options( 7961 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7962 ) -> t.Optional[exp.Var]: 7963 start = self._curr 7964 if not start: 7965 return None 7966 7967 option = start.text.upper() 7968 continuations = options.get(option) 7969 7970 index = self._index 7971 self._advance() 7972 for keywords in continuations or []: 7973 if isinstance(keywords, str): 7974 keywords = (keywords,) 7975 7976 if self._match_text_seq(*keywords): 7977 option = f"{option} {' '.join(keywords)}" 7978 break 7979 else: 7980 if continuations or continuations is None: 7981 if raise_unmatched: 7982 self.raise_error(f"Unknown option {option}") 7983 7984 self._retreat(index) 7985 return None 7986 7987 return exp.var(option) 7988 7989 def _parse_as_command(self, start: Token) -> exp.Command: 7990 while self._curr: 7991 self._advance() 7992 text = self._find_sql(start, self._prev) 7993 size = len(start.text) 7994 self._warn_unsupported() 7995 return exp.Command(this=text[:size], expression=text[size:]) 7996 7997 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7998 settings = [] 7999 8000 self._match_l_paren() 8001 kind = self._parse_id_var() 8002 8003 if self._match(TokenType.L_PAREN): 8004 while True: 8005 key = self._parse_id_var() 8006 value = self._parse_primary() 8007 if not key and value is None: 8008 break 8009 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8010 self._match(TokenType.R_PAREN) 8011 8012 self._match_r_paren() 8013 8014 return self.expression( 8015 exp.DictProperty, 8016 this=this, 8017 kind=kind.this if kind else None, 8018 settings=settings, 8019 ) 8020 8021 def _parse_dict_range(self, this: str) -> exp.DictRange: 8022 self._match_l_paren() 8023 has_min = self._match_text_seq("MIN") 8024 if has_min: 8025 min = self._parse_var() or self._parse_primary() 8026 self._match_text_seq("MAX") 8027 max = self._parse_var() or self._parse_primary() 8028 else: 8029 max = self._parse_var() or self._parse_primary() 8030 min = exp.Literal.number(0) 8031 self._match_r_paren() 8032 return self.expression(exp.DictRange, this=this, min=min, max=max) 8033 8034 def _parse_comprehension( 8035 self, this: t.Optional[exp.Expression] 8036 ) -> t.Optional[exp.Comprehension]: 8037 index = self._index 8038 expression = self._parse_column() 8039 if not self._match(TokenType.IN): 8040 self._retreat(index - 1) 8041 return None 8042 iterator = self._parse_column() 8043 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8044 return self.expression( 8045 exp.Comprehension, 8046 this=this, 8047 expression=expression, 8048 iterator=iterator, 8049 condition=condition, 8050 ) 8051 8052 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8053 if self._match(TokenType.HEREDOC_STRING): 8054 return self.expression(exp.Heredoc, this=self._prev.text) 8055 8056 if not self._match_text_seq("$"): 8057 return None 8058 8059 tags = ["$"] 8060 tag_text = None 8061 8062 if self._is_connected(): 8063 self._advance() 8064 tags.append(self._prev.text.upper()) 8065 else: 8066 self.raise_error("No closing $ found") 8067 8068 if tags[-1] != "$": 8069 if self._is_connected() and self._match_text_seq("$"): 8070 tag_text = tags[-1] 8071 tags.append("$") 8072 else: 8073 self.raise_error("No closing $ found") 8074 8075 heredoc_start = self._curr 8076 8077 while self._curr: 8078 if self._match_text_seq(*tags, advance=False): 8079 this = self._find_sql(heredoc_start, self._prev) 8080 self._advance(len(tags)) 8081 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8082 8083 self._advance() 8084 8085 self.raise_error(f"No closing {''.join(tags)} found") 8086 return None 8087 8088 def _find_parser( 8089 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8090 ) -> t.Optional[t.Callable]: 8091 if not self._curr: 8092 return None 8093 8094 index = self._index 8095 this = [] 8096 while True: 8097 # The current token might be multiple words 8098 curr = self._curr.text.upper() 8099 key = curr.split(" ") 8100 this.append(curr) 8101 8102 self._advance() 8103 result, trie = in_trie(trie, key) 8104 if result == TrieResult.FAILED: 8105 break 8106 8107 if result == TrieResult.EXISTS: 8108 subparser = parsers[" ".join(this)] 8109 return subparser 8110 8111 self._retreat(index) 8112 return None 8113 8114 def _match(self, token_type, advance=True, expression=None): 8115 if not self._curr: 8116 return None 8117 8118 if self._curr.token_type == token_type: 8119 if advance: 8120 self._advance() 8121 self._add_comments(expression) 8122 return True 8123 8124 return None 8125 8126 def _match_set(self, types, advance=True): 8127 if not self._curr: 8128 return None 8129 8130 if self._curr.token_type in types: 8131 if advance: 8132 self._advance() 8133 return True 8134 8135 return None 8136 8137 def _match_pair(self, token_type_a, token_type_b, advance=True): 8138 if not self._curr or not self._next: 8139 return None 8140 8141 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8142 if advance: 8143 self._advance(2) 8144 return True 8145 8146 return None 8147 8148 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8149 if not self._match(TokenType.L_PAREN, expression=expression): 8150 self.raise_error("Expecting (") 8151 8152 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8153 if not self._match(TokenType.R_PAREN, expression=expression): 8154 self.raise_error("Expecting )") 8155 8156 def _match_texts(self, texts, advance=True): 8157 if ( 8158 self._curr 8159 and self._curr.token_type != TokenType.STRING 8160 and self._curr.text.upper() in texts 8161 ): 8162 if advance: 8163 self._advance() 8164 return True 8165 return None 8166 8167 def _match_text_seq(self, *texts, advance=True): 8168 index = self._index 8169 for text in texts: 8170 if ( 8171 self._curr 8172 and self._curr.token_type != TokenType.STRING 8173 and self._curr.text.upper() == text 8174 ): 8175 self._advance() 8176 else: 8177 self._retreat(index) 8178 return None 8179 8180 if not advance: 8181 self._retreat(index) 8182 8183 return True 8184 8185 def _replace_lambda( 8186 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8187 ) -> t.Optional[exp.Expression]: 8188 if not node: 8189 return node 8190 8191 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8192 8193 for column in node.find_all(exp.Column): 8194 typ = lambda_types.get(column.parts[0].name) 8195 if typ is not None: 8196 dot_or_id = column.to_dot() if column.table else column.this 8197 8198 if typ: 8199 dot_or_id = self.expression( 8200 exp.Cast, 8201 this=dot_or_id, 8202 to=typ, 8203 ) 8204 8205 parent = column.parent 8206 8207 while isinstance(parent, exp.Dot): 8208 if not isinstance(parent.parent, exp.Dot): 8209 parent.replace(dot_or_id) 8210 break 8211 parent = parent.parent 8212 else: 8213 if column is node: 8214 node = dot_or_id 8215 else: 8216 column.replace(dot_or_id) 8217 return node 8218 8219 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8220 start = self._prev 8221 8222 # Not to be confused with TRUNCATE(number, decimals) function call 8223 if self._match(TokenType.L_PAREN): 8224 self._retreat(self._index - 2) 8225 return self._parse_function() 8226 8227 # Clickhouse supports TRUNCATE DATABASE as well 8228 is_database = self._match(TokenType.DATABASE) 8229 8230 self._match(TokenType.TABLE) 8231 8232 exists = self._parse_exists(not_=False) 8233 8234 expressions = self._parse_csv( 8235 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8236 ) 8237 8238 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8239 8240 if self._match_text_seq("RESTART", "IDENTITY"): 8241 identity = "RESTART" 8242 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8243 identity = "CONTINUE" 8244 else: 8245 identity = None 8246 8247 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8248 option = self._prev.text 8249 else: 8250 option = None 8251 8252 partition = self._parse_partition() 8253 8254 # Fallback case 8255 if self._curr: 8256 return self._parse_as_command(start) 8257 8258 return self.expression( 8259 exp.TruncateTable, 8260 expressions=expressions, 8261 is_database=is_database, 8262 exists=exists, 8263 cluster=cluster, 8264 identity=identity, 8265 option=option, 8266 partition=partition, 8267 ) 8268 8269 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8270 this = self._parse_ordered(self._parse_opclass) 8271 8272 if not self._match(TokenType.WITH): 8273 return this 8274 8275 op = self._parse_var(any_token=True) 8276 8277 return self.expression(exp.WithOperator, this=this, op=op) 8278 8279 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8280 self._match(TokenType.EQ) 8281 self._match(TokenType.L_PAREN) 8282 8283 opts: t.List[t.Optional[exp.Expression]] = [] 8284 option: exp.Expression | None 8285 while self._curr and not self._match(TokenType.R_PAREN): 8286 if self._match_text_seq("FORMAT_NAME", "="): 8287 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8288 option = self._parse_format_name() 8289 else: 8290 option = self._parse_property() 8291 8292 if option is None: 8293 self.raise_error("Unable to parse option") 8294 break 8295 8296 opts.append(option) 8297 8298 return opts 8299 8300 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8301 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8302 8303 options = [] 8304 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8305 option = self._parse_var(any_token=True) 8306 prev = self._prev.text.upper() 8307 8308 # Different dialects might separate options and values by white space, "=" and "AS" 8309 self._match(TokenType.EQ) 8310 self._match(TokenType.ALIAS) 8311 8312 param = self.expression(exp.CopyParameter, this=option) 8313 8314 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8315 TokenType.L_PAREN, advance=False 8316 ): 8317 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8318 param.set("expressions", self._parse_wrapped_options()) 8319 elif prev == "FILE_FORMAT": 8320 # T-SQL's external file format case 8321 param.set("expression", self._parse_field()) 8322 else: 8323 param.set("expression", self._parse_unquoted_field()) 8324 8325 options.append(param) 8326 self._match(sep) 8327 8328 return options 8329 8330 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8331 expr = self.expression(exp.Credentials) 8332 8333 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8334 expr.set("storage", self._parse_field()) 8335 if self._match_text_seq("CREDENTIALS"): 8336 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8337 creds = ( 8338 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8339 ) 8340 expr.set("credentials", creds) 8341 if self._match_text_seq("ENCRYPTION"): 8342 expr.set("encryption", self._parse_wrapped_options()) 8343 if self._match_text_seq("IAM_ROLE"): 8344 expr.set("iam_role", self._parse_field()) 8345 if self._match_text_seq("REGION"): 8346 expr.set("region", self._parse_field()) 8347 8348 return expr 8349 8350 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8351 return self._parse_field() 8352 8353 def _parse_copy(self) -> exp.Copy | exp.Command: 8354 start = self._prev 8355 8356 self._match(TokenType.INTO) 8357 8358 this = ( 8359 self._parse_select(nested=True, parse_subquery_alias=False) 8360 if self._match(TokenType.L_PAREN, advance=False) 8361 else self._parse_table(schema=True) 8362 ) 8363 8364 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8365 8366 files = self._parse_csv(self._parse_file_location) 8367 credentials = self._parse_credentials() 8368 8369 self._match_text_seq("WITH") 8370 8371 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8372 8373 # Fallback case 8374 if self._curr: 8375 return self._parse_as_command(start) 8376 8377 return self.expression( 8378 exp.Copy, 8379 this=this, 8380 kind=kind, 8381 credentials=credentials, 8382 files=files, 8383 params=params, 8384 ) 8385 8386 def _parse_normalize(self) -> exp.Normalize: 8387 return self.expression( 8388 exp.Normalize, 8389 this=self._parse_bitwise(), 8390 form=self._match(TokenType.COMMA) and self._parse_var(), 8391 ) 8392 8393 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8394 args = self._parse_csv(lambda: self._parse_lambda()) 8395 8396 this = seq_get(args, 0) 8397 decimals = seq_get(args, 1) 8398 8399 return expr_type( 8400 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8401 ) 8402 8403 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8404 star_token = self._prev 8405 8406 if self._match_text_seq("COLUMNS", "(", advance=False): 8407 this = self._parse_function() 8408 if isinstance(this, exp.Columns): 8409 this.set("unpack", True) 8410 return this 8411 8412 return self.expression( 8413 exp.Star, 8414 **{ # type: ignore 8415 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8416 "replace": self._parse_star_op("REPLACE"), 8417 "rename": self._parse_star_op("RENAME"), 8418 }, 8419 ).update_positions(star_token) 8420 8421 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8422 privilege_parts = [] 8423 8424 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8425 # (end of privilege list) or L_PAREN (start of column list) are met 8426 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8427 privilege_parts.append(self._curr.text.upper()) 8428 self._advance() 8429 8430 this = exp.var(" ".join(privilege_parts)) 8431 expressions = ( 8432 self._parse_wrapped_csv(self._parse_column) 8433 if self._match(TokenType.L_PAREN, advance=False) 8434 else None 8435 ) 8436 8437 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8438 8439 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8440 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8441 principal = self._parse_id_var() 8442 8443 if not principal: 8444 return None 8445 8446 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8447 8448 def _parse_grant_revoke_common( 8449 self, 8450 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8451 privileges = self._parse_csv(self._parse_grant_privilege) 8452 8453 self._match(TokenType.ON) 8454 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8455 8456 # Attempt to parse the securable e.g. MySQL allows names 8457 # such as "foo.*", "*.*" which are not easily parseable yet 8458 securable = self._try_parse(self._parse_table_parts) 8459 8460 return privileges, kind, securable 8461 8462 def _parse_grant(self) -> exp.Grant | exp.Command: 8463 start = self._prev 8464 8465 privileges, kind, securable = self._parse_grant_revoke_common() 8466 8467 if not securable or not self._match_text_seq("TO"): 8468 return self._parse_as_command(start) 8469 8470 principals = self._parse_csv(self._parse_grant_principal) 8471 8472 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8473 8474 if self._curr: 8475 return self._parse_as_command(start) 8476 8477 return self.expression( 8478 exp.Grant, 8479 privileges=privileges, 8480 kind=kind, 8481 securable=securable, 8482 principals=principals, 8483 grant_option=grant_option, 8484 ) 8485 8486 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8487 start = self._prev 8488 8489 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8490 8491 privileges, kind, securable = self._parse_grant_revoke_common() 8492 8493 if not securable or not self._match_text_seq("FROM"): 8494 return self._parse_as_command(start) 8495 8496 principals = self._parse_csv(self._parse_grant_principal) 8497 8498 cascade = None 8499 if self._match_texts(("CASCADE", "RESTRICT")): 8500 cascade = self._prev.text.upper() 8501 8502 if self._curr: 8503 return self._parse_as_command(start) 8504 8505 return self.expression( 8506 exp.Revoke, 8507 privileges=privileges, 8508 kind=kind, 8509 securable=securable, 8510 principals=principals, 8511 grant_option=grant_option, 8512 cascade=cascade, 8513 ) 8514 8515 def _parse_overlay(self) -> exp.Overlay: 8516 return self.expression( 8517 exp.Overlay, 8518 **{ # type: ignore 8519 "this": self._parse_bitwise(), 8520 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8521 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8522 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8523 }, 8524 ) 8525 8526 def _parse_format_name(self) -> exp.Property: 8527 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8528 # for FILE_FORMAT = <format_name> 8529 return self.expression( 8530 exp.Property, 8531 this=exp.var("FORMAT_NAME"), 8532 value=self._parse_string() or self._parse_table_parts(), 8533 ) 8534 8535 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8536 args: t.List[exp.Expression] = [] 8537 8538 if self._match(TokenType.DISTINCT): 8539 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8540 self._match(TokenType.COMMA) 8541 8542 args.extend(self._parse_csv(self._parse_assignment)) 8543 8544 return self.expression( 8545 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8546 ) 8547 8548 def _identifier_expression( 8549 self, token: t.Optional[Token] = None, **kwargs: t.Any 8550 ) -> exp.Identifier: 8551 token = token or self._prev 8552 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8553 expression.update_positions(token) 8554 return expression 8555 8556 def _build_pipe_cte( 8557 self, 8558 query: exp.Query, 8559 expressions: t.List[exp.Expression], 8560 alias_cte: t.Optional[exp.TableAlias] = None, 8561 ) -> exp.Select: 8562 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8563 if alias_cte: 8564 new_cte = alias_cte 8565 else: 8566 self._pipe_cte_counter += 1 8567 new_cte = f"__tmp{self._pipe_cte_counter}" 8568 8569 with_ = query.args.get("with") 8570 ctes = with_.pop() if with_ else None 8571 8572 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8573 if ctes: 8574 new_select.set("with", ctes) 8575 8576 return new_select.with_(new_cte, as_=query, copy=False) 8577 8578 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8579 select = self._parse_select(consume_pipe=False) 8580 if not select: 8581 return query 8582 8583 return self._build_pipe_cte( 8584 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8585 ) 8586 8587 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8588 limit = self._parse_limit() 8589 offset = self._parse_offset() 8590 if limit: 8591 curr_limit = query.args.get("limit", limit) 8592 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8593 query.limit(limit, copy=False) 8594 if offset: 8595 curr_offset = query.args.get("offset") 8596 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8597 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8598 8599 return query 8600 8601 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8602 this = self._parse_assignment() 8603 if self._match_text_seq("GROUP", "AND", advance=False): 8604 return this 8605 8606 this = self._parse_alias(this) 8607 8608 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8609 return self._parse_ordered(lambda: this) 8610 8611 return this 8612 8613 def _parse_pipe_syntax_aggregate_group_order_by( 8614 self, query: exp.Select, group_by_exists: bool = True 8615 ) -> exp.Select: 8616 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8617 aggregates_or_groups, orders = [], [] 8618 for element in expr: 8619 if isinstance(element, exp.Ordered): 8620 this = element.this 8621 if isinstance(this, exp.Alias): 8622 element.set("this", this.args["alias"]) 8623 orders.append(element) 8624 else: 8625 this = element 8626 aggregates_or_groups.append(this) 8627 8628 if group_by_exists: 8629 query.select(*aggregates_or_groups, copy=False).group_by( 8630 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8631 copy=False, 8632 ) 8633 else: 8634 query.select(*aggregates_or_groups, append=False, copy=False) 8635 8636 if orders: 8637 return query.order_by(*orders, append=False, copy=False) 8638 8639 return query 8640 8641 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8642 self._match_text_seq("AGGREGATE") 8643 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8644 8645 if self._match(TokenType.GROUP_BY) or ( 8646 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8647 ): 8648 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8649 8650 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8651 8652 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8653 first_setop = self.parse_set_operation(this=query) 8654 if not first_setop: 8655 return None 8656 8657 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8658 expr = self._parse_paren() 8659 return expr.assert_is(exp.Subquery).unnest() if expr else None 8660 8661 first_setop.this.pop() 8662 8663 setops = [ 8664 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8665 *self._parse_csv(_parse_and_unwrap_query), 8666 ] 8667 8668 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8669 with_ = query.args.get("with") 8670 ctes = with_.pop() if with_ else None 8671 8672 if isinstance(first_setop, exp.Union): 8673 query = query.union(*setops, copy=False, **first_setop.args) 8674 elif isinstance(first_setop, exp.Except): 8675 query = query.except_(*setops, copy=False, **first_setop.args) 8676 else: 8677 query = query.intersect(*setops, copy=False, **first_setop.args) 8678 8679 query.set("with", ctes) 8680 8681 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8682 8683 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8684 join = self._parse_join() 8685 if not join: 8686 return None 8687 8688 if isinstance(query, exp.Select): 8689 return query.join(join, copy=False) 8690 8691 return query 8692 8693 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8694 pivots = self._parse_pivots() 8695 if not pivots: 8696 return query 8697 8698 from_ = query.args.get("from") 8699 if from_: 8700 from_.this.set("pivots", pivots) 8701 8702 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8703 8704 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8705 self._match_text_seq("EXTEND") 8706 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8707 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8708 8709 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8710 sample = self._parse_table_sample() 8711 8712 with_ = query.args.get("with") 8713 if with_: 8714 with_.expressions[-1].this.set("sample", sample) 8715 else: 8716 query.set("sample", sample) 8717 8718 return query 8719 8720 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8721 if isinstance(query, exp.Subquery): 8722 query = exp.select("*").from_(query, copy=False) 8723 8724 if not query.args.get("from"): 8725 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8726 8727 while self._match(TokenType.PIPE_GT): 8728 start = self._curr 8729 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8730 if not parser: 8731 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8732 # keywords, making it tricky to disambiguate them without lookahead. The approach 8733 # here is to try and parse a set operation and if that fails, then try to parse a 8734 # join operator. If that fails as well, then the operator is not supported. 8735 parsed_query = self._parse_pipe_syntax_set_operator(query) 8736 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8737 if not parsed_query: 8738 self._retreat(start) 8739 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8740 break 8741 query = parsed_query 8742 else: 8743 query = parser(self, query) 8744 8745 return query 8746 8747 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8748 vars = self._parse_csv(self._parse_id_var) 8749 if not vars: 8750 return None 8751 8752 return self.expression( 8753 exp.DeclareItem, 8754 this=vars, 8755 kind=self._parse_types(), 8756 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8757 ) 8758 8759 def _parse_declare(self) -> exp.Declare | exp.Command: 8760 start = self._prev 8761 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8762 8763 if not expressions or self._curr: 8764 return self._parse_as_command(start) 8765 8766 return self.expression(exp.Declare, expressions=expressions) 8767 8768 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8769 exp_class = exp.Cast if strict else exp.TryCast 8770 8771 if exp_class == exp.TryCast: 8772 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8773 8774 return self.expression(exp_class, **kwargs) 8775 8776 def _parse_json_value(self) -> exp.JSONValue: 8777 this = self._parse_bitwise() 8778 self._match(TokenType.COMMA) 8779 path = self._parse_bitwise() 8780 8781 returning = self._match(TokenType.RETURNING) and self._parse_type() 8782 8783 return self.expression( 8784 exp.JSONValue, 8785 this=this, 8786 path=self.dialect.to_json_path(path), 8787 returning=returning, 8788 on_condition=self._parse_on_condition(), 8789 ) 8790 8791 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8792 def concat_exprs( 8793 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8794 ) -> exp.Expression: 8795 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8796 concat_exprs = [ 8797 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8798 ] 8799 node.set("expressions", concat_exprs) 8800 return node 8801 if len(exprs) == 1: 8802 return exprs[0] 8803 return self.expression(exp.Concat, expressions=args, safe=True) 8804 8805 args = self._parse_csv(self._parse_lambda) 8806 8807 if args: 8808 order = args[-1] if isinstance(args[-1], exp.Order) else None 8809 8810 if order: 8811 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8812 # remove 'expr' from exp.Order and add it back to args 8813 args[-1] = order.this 8814 order.set("this", concat_exprs(order.this, args)) 8815 8816 this = order or concat_exprs(args[0], args) 8817 else: 8818 this = None 8819 8820 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8821 8822 return self.expression(exp.GroupConcat, this=this, separator=separator)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 TokenType.SESSION, 484 } 485 486 # Tokens that can represent identifiers 487 ID_VAR_TOKENS = { 488 TokenType.ALL, 489 TokenType.ATTACH, 490 TokenType.VAR, 491 TokenType.ANTI, 492 TokenType.APPLY, 493 TokenType.ASC, 494 TokenType.ASOF, 495 TokenType.AUTO_INCREMENT, 496 TokenType.BEGIN, 497 TokenType.BPCHAR, 498 TokenType.CACHE, 499 TokenType.CASE, 500 TokenType.COLLATE, 501 TokenType.COMMAND, 502 TokenType.COMMENT, 503 TokenType.COMMIT, 504 TokenType.CONSTRAINT, 505 TokenType.COPY, 506 TokenType.CUBE, 507 TokenType.CURRENT_SCHEMA, 508 TokenType.DEFAULT, 509 TokenType.DELETE, 510 TokenType.DESC, 511 TokenType.DESCRIBE, 512 TokenType.DETACH, 513 TokenType.DICTIONARY, 514 TokenType.DIV, 515 TokenType.END, 516 TokenType.EXECUTE, 517 TokenType.EXPORT, 518 TokenType.ESCAPE, 519 TokenType.FALSE, 520 TokenType.FIRST, 521 TokenType.FILTER, 522 TokenType.FINAL, 523 TokenType.FORMAT, 524 TokenType.FULL, 525 TokenType.GET, 526 TokenType.IDENTIFIER, 527 TokenType.IS, 528 TokenType.ISNULL, 529 TokenType.INTERVAL, 530 TokenType.KEEP, 531 TokenType.KILL, 532 TokenType.LEFT, 533 TokenType.LIMIT, 534 TokenType.LOAD, 535 TokenType.MERGE, 536 TokenType.NATURAL, 537 TokenType.NEXT, 538 TokenType.OFFSET, 539 TokenType.OPERATOR, 540 TokenType.ORDINALITY, 541 TokenType.OVERLAPS, 542 TokenType.OVERWRITE, 543 TokenType.PARTITION, 544 TokenType.PERCENT, 545 TokenType.PIVOT, 546 TokenType.PRAGMA, 547 TokenType.PUT, 548 TokenType.RANGE, 549 TokenType.RECURSIVE, 550 TokenType.REFERENCES, 551 TokenType.REFRESH, 552 TokenType.RENAME, 553 TokenType.REPLACE, 554 TokenType.RIGHT, 555 TokenType.ROLLUP, 556 TokenType.ROW, 557 TokenType.ROWS, 558 TokenType.SEMI, 559 TokenType.SET, 560 TokenType.SETTINGS, 561 TokenType.SHOW, 562 TokenType.TEMPORARY, 563 TokenType.TOP, 564 TokenType.TRUE, 565 TokenType.TRUNCATE, 566 TokenType.UNIQUE, 567 TokenType.UNNEST, 568 TokenType.UNPIVOT, 569 TokenType.UPDATE, 570 TokenType.USE, 571 TokenType.VOLATILE, 572 TokenType.WINDOW, 573 *CREATABLES, 574 *SUBQUERY_PREDICATES, 575 *TYPE_TOKENS, 576 *NO_PAREN_FUNCTIONS, 577 } 578 ID_VAR_TOKENS.remove(TokenType.UNION) 579 580 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 581 TokenType.ANTI, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.UTC_DATE, 647 TokenType.UTC_TIME, 648 TokenType.UTC_TIMESTAMP, 649 TokenType.WINDOW, 650 TokenType.XOR, 651 *TYPE_TOKENS, 652 *SUBQUERY_PREDICATES, 653 } 654 655 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.AND: exp.And, 657 } 658 659 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.COLON_EQ: exp.PropertyEQ, 661 } 662 663 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 664 TokenType.OR: exp.Or, 665 } 666 667 EQUALITY = { 668 TokenType.EQ: exp.EQ, 669 TokenType.NEQ: exp.NEQ, 670 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 671 } 672 673 COMPARISON = { 674 TokenType.GT: exp.GT, 675 TokenType.GTE: exp.GTE, 676 TokenType.LT: exp.LT, 677 TokenType.LTE: exp.LTE, 678 } 679 680 BITWISE = { 681 TokenType.AMP: exp.BitwiseAnd, 682 TokenType.CARET: exp.BitwiseXor, 683 TokenType.PIPE: exp.BitwiseOr, 684 } 685 686 TERM = { 687 TokenType.DASH: exp.Sub, 688 TokenType.PLUS: exp.Add, 689 TokenType.MOD: exp.Mod, 690 TokenType.COLLATE: exp.Collate, 691 } 692 693 FACTOR = { 694 TokenType.DIV: exp.IntDiv, 695 TokenType.LR_ARROW: exp.Distance, 696 TokenType.SLASH: exp.Div, 697 TokenType.STAR: exp.Mul, 698 } 699 700 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 701 702 TIMES = { 703 TokenType.TIME, 704 TokenType.TIMETZ, 705 } 706 707 TIMESTAMPS = { 708 TokenType.TIMESTAMP, 709 TokenType.TIMESTAMPNTZ, 710 TokenType.TIMESTAMPTZ, 711 TokenType.TIMESTAMPLTZ, 712 *TIMES, 713 } 714 715 SET_OPERATIONS = { 716 TokenType.UNION, 717 TokenType.INTERSECT, 718 TokenType.EXCEPT, 719 } 720 721 JOIN_METHODS = { 722 TokenType.ASOF, 723 TokenType.NATURAL, 724 TokenType.POSITIONAL, 725 } 726 727 JOIN_SIDES = { 728 TokenType.LEFT, 729 TokenType.RIGHT, 730 TokenType.FULL, 731 } 732 733 JOIN_KINDS = { 734 TokenType.ANTI, 735 TokenType.CROSS, 736 TokenType.INNER, 737 TokenType.OUTER, 738 TokenType.SEMI, 739 TokenType.STRAIGHT_JOIN, 740 } 741 742 JOIN_HINTS: t.Set[str] = set() 743 744 LAMBDAS = { 745 TokenType.ARROW: lambda self, expressions: self.expression( 746 exp.Lambda, 747 this=self._replace_lambda( 748 self._parse_assignment(), 749 expressions, 750 ), 751 expressions=expressions, 752 ), 753 TokenType.FARROW: lambda self, expressions: self.expression( 754 exp.Kwarg, 755 this=exp.var(expressions[0].name), 756 expression=self._parse_assignment(), 757 ), 758 } 759 760 COLUMN_OPERATORS = { 761 TokenType.DOT: None, 762 TokenType.DOTCOLON: lambda self, this, to: self.expression( 763 exp.JSONCast, 764 this=this, 765 to=to, 766 ), 767 TokenType.DCOLON: lambda self, this, to: self.build_cast( 768 strict=self.STRICT_CAST, this=this, to=to 769 ), 770 TokenType.ARROW: lambda self, this, path: self.expression( 771 exp.JSONExtract, 772 this=this, 773 expression=self.dialect.to_json_path(path), 774 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 775 ), 776 TokenType.DARROW: lambda self, this, path: self.expression( 777 exp.JSONExtractScalar, 778 this=this, 779 expression=self.dialect.to_json_path(path), 780 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 781 ), 782 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 783 exp.JSONBExtract, 784 this=this, 785 expression=path, 786 ), 787 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 788 exp.JSONBExtractScalar, 789 this=this, 790 expression=path, 791 ), 792 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 793 exp.JSONBContains, 794 this=this, 795 expression=key, 796 ), 797 } 798 799 CAST_COLUMN_OPERATORS = { 800 TokenType.DOTCOLON, 801 TokenType.DCOLON, 802 } 803 804 EXPRESSION_PARSERS = { 805 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 806 exp.Column: lambda self: self._parse_column(), 807 exp.Condition: lambda self: self._parse_assignment(), 808 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 809 exp.Expression: lambda self: self._parse_expression(), 810 exp.From: lambda self: self._parse_from(joins=True), 811 exp.Group: lambda self: self._parse_group(), 812 exp.Having: lambda self: self._parse_having(), 813 exp.Hint: lambda self: self._parse_hint_body(), 814 exp.Identifier: lambda self: self._parse_id_var(), 815 exp.Join: lambda self: self._parse_join(), 816 exp.Lambda: lambda self: self._parse_lambda(), 817 exp.Lateral: lambda self: self._parse_lateral(), 818 exp.Limit: lambda self: self._parse_limit(), 819 exp.Offset: lambda self: self._parse_offset(), 820 exp.Order: lambda self: self._parse_order(), 821 exp.Ordered: lambda self: self._parse_ordered(), 822 exp.Properties: lambda self: self._parse_properties(), 823 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 824 exp.Qualify: lambda self: self._parse_qualify(), 825 exp.Returning: lambda self: self._parse_returning(), 826 exp.Select: lambda self: self._parse_select(), 827 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 828 exp.Table: lambda self: self._parse_table_parts(), 829 exp.TableAlias: lambda self: self._parse_table_alias(), 830 exp.Tuple: lambda self: self._parse_value(values=False), 831 exp.Whens: lambda self: self._parse_when_matched(), 832 exp.Where: lambda self: self._parse_where(), 833 exp.Window: lambda self: self._parse_named_window(), 834 exp.With: lambda self: self._parse_with(), 835 "JOIN_TYPE": lambda self: self._parse_join_parts(), 836 } 837 838 STATEMENT_PARSERS = { 839 TokenType.ALTER: lambda self: self._parse_alter(), 840 TokenType.ANALYZE: lambda self: self._parse_analyze(), 841 TokenType.BEGIN: lambda self: self._parse_transaction(), 842 TokenType.CACHE: lambda self: self._parse_cache(), 843 TokenType.COMMENT: lambda self: self._parse_comment(), 844 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 845 TokenType.COPY: lambda self: self._parse_copy(), 846 TokenType.CREATE: lambda self: self._parse_create(), 847 TokenType.DELETE: lambda self: self._parse_delete(), 848 TokenType.DESC: lambda self: self._parse_describe(), 849 TokenType.DESCRIBE: lambda self: self._parse_describe(), 850 TokenType.DROP: lambda self: self._parse_drop(), 851 TokenType.GRANT: lambda self: self._parse_grant(), 852 TokenType.REVOKE: lambda self: self._parse_revoke(), 853 TokenType.INSERT: lambda self: self._parse_insert(), 854 TokenType.KILL: lambda self: self._parse_kill(), 855 TokenType.LOAD: lambda self: self._parse_load(), 856 TokenType.MERGE: lambda self: self._parse_merge(), 857 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 858 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 859 TokenType.REFRESH: lambda self: self._parse_refresh(), 860 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 861 TokenType.SET: lambda self: self._parse_set(), 862 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 863 TokenType.UNCACHE: lambda self: self._parse_uncache(), 864 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 865 TokenType.UPDATE: lambda self: self._parse_update(), 866 TokenType.USE: lambda self: self._parse_use(), 867 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 868 } 869 870 UNARY_PARSERS = { 871 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 872 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 873 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 874 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 875 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 876 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 877 } 878 879 STRING_PARSERS = { 880 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 881 exp.RawString, this=token.text 882 ), 883 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 884 exp.National, this=token.text 885 ), 886 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 887 TokenType.STRING: lambda self, token: self.expression( 888 exp.Literal, this=token.text, is_string=True 889 ), 890 TokenType.UNICODE_STRING: lambda self, token: self.expression( 891 exp.UnicodeString, 892 this=token.text, 893 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 894 ), 895 } 896 897 NUMERIC_PARSERS = { 898 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 899 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 900 TokenType.HEX_STRING: lambda self, token: self.expression( 901 exp.HexString, 902 this=token.text, 903 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 904 ), 905 TokenType.NUMBER: lambda self, token: self.expression( 906 exp.Literal, this=token.text, is_string=False 907 ), 908 } 909 910 PRIMARY_PARSERS = { 911 **STRING_PARSERS, 912 **NUMERIC_PARSERS, 913 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 914 TokenType.NULL: lambda self, _: self.expression(exp.Null), 915 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 916 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 917 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 918 TokenType.STAR: lambda self, _: self._parse_star_ops(), 919 } 920 921 PLACEHOLDER_PARSERS = { 922 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 923 TokenType.PARAMETER: lambda self: self._parse_parameter(), 924 TokenType.COLON: lambda self: ( 925 self.expression(exp.Placeholder, this=self._prev.text) 926 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 927 else None 928 ), 929 } 930 931 RANGE_PARSERS = { 932 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 933 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 934 TokenType.GLOB: binary_range_parser(exp.Glob), 935 TokenType.ILIKE: binary_range_parser(exp.ILike), 936 TokenType.IN: lambda self, this: self._parse_in(this), 937 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 938 TokenType.IS: lambda self, this: self._parse_is(this), 939 TokenType.LIKE: binary_range_parser(exp.Like), 940 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 941 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 942 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 943 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 944 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 945 } 946 947 PIPE_SYNTAX_TRANSFORM_PARSERS = { 948 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 949 "AS": lambda self, query: self._build_pipe_cte( 950 query, [exp.Star()], self._parse_table_alias() 951 ), 952 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 953 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 954 "ORDER BY": lambda self, query: query.order_by( 955 self._parse_order(), append=False, copy=False 956 ), 957 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 958 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 959 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 960 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 961 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 962 } 963 964 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 965 "ALLOWED_VALUES": lambda self: self.expression( 966 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 967 ), 968 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 969 "AUTO": lambda self: self._parse_auto_property(), 970 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 971 "BACKUP": lambda self: self.expression( 972 exp.BackupProperty, this=self._parse_var(any_token=True) 973 ), 974 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 975 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 976 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 977 "CHECKSUM": lambda self: self._parse_checksum(), 978 "CLUSTER BY": lambda self: self._parse_cluster(), 979 "CLUSTERED": lambda self: self._parse_clustered_by(), 980 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 981 exp.CollateProperty, **kwargs 982 ), 983 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 984 "CONTAINS": lambda self: self._parse_contains_property(), 985 "COPY": lambda self: self._parse_copy_property(), 986 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 987 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 988 "DEFINER": lambda self: self._parse_definer(), 989 "DETERMINISTIC": lambda self: self.expression( 990 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 991 ), 992 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 993 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 994 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 995 "DISTKEY": lambda self: self._parse_distkey(), 996 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 997 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 998 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 999 "ENVIRONMENT": lambda self: self.expression( 1000 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 1001 ), 1002 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 1003 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 1004 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 1005 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1006 "FREESPACE": lambda self: self._parse_freespace(), 1007 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 1008 "HEAP": lambda self: self.expression(exp.HeapProperty), 1009 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1010 "IMMUTABLE": lambda self: self.expression( 1011 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1012 ), 1013 "INHERITS": lambda self: self.expression( 1014 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1015 ), 1016 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1017 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1018 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1019 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1020 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1021 "LIKE": lambda self: self._parse_create_like(), 1022 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1023 "LOCK": lambda self: self._parse_locking(), 1024 "LOCKING": lambda self: self._parse_locking(), 1025 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1026 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1027 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1028 "MODIFIES": lambda self: self._parse_modifies_property(), 1029 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1030 "NO": lambda self: self._parse_no_property(), 1031 "ON": lambda self: self._parse_on_property(), 1032 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1033 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1034 "PARTITION": lambda self: self._parse_partitioned_of(), 1035 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1036 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1037 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1038 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1039 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1040 "READS": lambda self: self._parse_reads_property(), 1041 "REMOTE": lambda self: self._parse_remote_with_connection(), 1042 "RETURNS": lambda self: self._parse_returns(), 1043 "STRICT": lambda self: self.expression(exp.StrictProperty), 1044 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1045 "ROW": lambda self: self._parse_row(), 1046 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1047 "SAMPLE": lambda self: self.expression( 1048 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1049 ), 1050 "SECURE": lambda self: self.expression(exp.SecureProperty), 1051 "SECURITY": lambda self: self._parse_security(), 1052 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1053 "SETTINGS": lambda self: self._parse_settings_property(), 1054 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1055 "SORTKEY": lambda self: self._parse_sortkey(), 1056 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1057 "STABLE": lambda self: self.expression( 1058 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1059 ), 1060 "STORED": lambda self: self._parse_stored(), 1061 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1062 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1063 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1064 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1065 "TO": lambda self: self._parse_to_table(), 1066 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1067 "TRANSFORM": lambda self: self.expression( 1068 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1069 ), 1070 "TTL": lambda self: self._parse_ttl(), 1071 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1072 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1073 "VOLATILE": lambda self: self._parse_volatile_property(), 1074 "WITH": lambda self: self._parse_with_property(), 1075 } 1076 1077 CONSTRAINT_PARSERS = { 1078 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1079 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1080 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1081 "CHARACTER SET": lambda self: self.expression( 1082 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1083 ), 1084 "CHECK": lambda self: self.expression( 1085 exp.CheckColumnConstraint, 1086 this=self._parse_wrapped(self._parse_assignment), 1087 enforced=self._match_text_seq("ENFORCED"), 1088 ), 1089 "COLLATE": lambda self: self.expression( 1090 exp.CollateColumnConstraint, 1091 this=self._parse_identifier() or self._parse_column(), 1092 ), 1093 "COMMENT": lambda self: self.expression( 1094 exp.CommentColumnConstraint, this=self._parse_string() 1095 ), 1096 "COMPRESS": lambda self: self._parse_compress(), 1097 "CLUSTERED": lambda self: self.expression( 1098 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1099 ), 1100 "NONCLUSTERED": lambda self: self.expression( 1101 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1102 ), 1103 "DEFAULT": lambda self: self.expression( 1104 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1105 ), 1106 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1107 "EPHEMERAL": lambda self: self.expression( 1108 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1109 ), 1110 "EXCLUDE": lambda self: self.expression( 1111 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1112 ), 1113 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1114 "FORMAT": lambda self: self.expression( 1115 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1116 ), 1117 "GENERATED": lambda self: self._parse_generated_as_identity(), 1118 "IDENTITY": lambda self: self._parse_auto_increment(), 1119 "INLINE": lambda self: self._parse_inline(), 1120 "LIKE": lambda self: self._parse_create_like(), 1121 "NOT": lambda self: self._parse_not_constraint(), 1122 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1123 "ON": lambda self: ( 1124 self._match(TokenType.UPDATE) 1125 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1126 ) 1127 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1128 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1129 "PERIOD": lambda self: self._parse_period_for_system_time(), 1130 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1131 "REFERENCES": lambda self: self._parse_references(match=False), 1132 "TITLE": lambda self: self.expression( 1133 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1134 ), 1135 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1136 "UNIQUE": lambda self: self._parse_unique(), 1137 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1138 "WATERMARK": lambda self: self.expression( 1139 exp.WatermarkColumnConstraint, 1140 this=self._match(TokenType.FOR) and self._parse_column(), 1141 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1142 ), 1143 "WITH": lambda self: self.expression( 1144 exp.Properties, expressions=self._parse_wrapped_properties() 1145 ), 1146 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1147 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1148 } 1149 1150 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1151 if not self._match(TokenType.L_PAREN, advance=False): 1152 # Partitioning by bucket or truncate follows the syntax: 1153 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1154 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1155 self._retreat(self._index - 1) 1156 return None 1157 1158 klass = ( 1159 exp.PartitionedByBucket 1160 if self._prev.text.upper() == "BUCKET" 1161 else exp.PartitionByTruncate 1162 ) 1163 1164 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1165 this, expression = seq_get(args, 0), seq_get(args, 1) 1166 1167 if isinstance(this, exp.Literal): 1168 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1169 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1170 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1171 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1172 # 1173 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1174 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1175 this, expression = expression, this 1176 1177 return self.expression(klass, this=this, expression=expression) 1178 1179 ALTER_PARSERS = { 1180 "ADD": lambda self: self._parse_alter_table_add(), 1181 "AS": lambda self: self._parse_select(), 1182 "ALTER": lambda self: self._parse_alter_table_alter(), 1183 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1184 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1185 "DROP": lambda self: self._parse_alter_table_drop(), 1186 "RENAME": lambda self: self._parse_alter_table_rename(), 1187 "SET": lambda self: self._parse_alter_table_set(), 1188 "SWAP": lambda self: self.expression( 1189 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1190 ), 1191 } 1192 1193 ALTER_ALTER_PARSERS = { 1194 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1195 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1196 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1197 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1198 } 1199 1200 SCHEMA_UNNAMED_CONSTRAINTS = { 1201 "CHECK", 1202 "EXCLUDE", 1203 "FOREIGN KEY", 1204 "LIKE", 1205 "PERIOD", 1206 "PRIMARY KEY", 1207 "UNIQUE", 1208 "WATERMARK", 1209 "BUCKET", 1210 "TRUNCATE", 1211 } 1212 1213 NO_PAREN_FUNCTION_PARSERS = { 1214 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1215 "CASE": lambda self: self._parse_case(), 1216 "CONNECT_BY_ROOT": lambda self: self.expression( 1217 exp.ConnectByRoot, this=self._parse_column() 1218 ), 1219 "IF": lambda self: self._parse_if(), 1220 } 1221 1222 INVALID_FUNC_NAME_TOKENS = { 1223 TokenType.IDENTIFIER, 1224 TokenType.STRING, 1225 } 1226 1227 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1228 1229 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1230 1231 FUNCTION_PARSERS = { 1232 **{ 1233 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1234 }, 1235 **{ 1236 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1237 }, 1238 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1239 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1240 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1241 "DECODE": lambda self: self._parse_decode(), 1242 "EXTRACT": lambda self: self._parse_extract(), 1243 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1244 "GAP_FILL": lambda self: self._parse_gap_fill(), 1245 "JSON_OBJECT": lambda self: self._parse_json_object(), 1246 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1247 "JSON_TABLE": lambda self: self._parse_json_table(), 1248 "MATCH": lambda self: self._parse_match_against(), 1249 "NORMALIZE": lambda self: self._parse_normalize(), 1250 "OPENJSON": lambda self: self._parse_open_json(), 1251 "OVERLAY": lambda self: self._parse_overlay(), 1252 "POSITION": lambda self: self._parse_position(), 1253 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1254 "STRING_AGG": lambda self: self._parse_string_agg(), 1255 "SUBSTRING": lambda self: self._parse_substring(), 1256 "TRIM": lambda self: self._parse_trim(), 1257 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1258 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1259 "XMLELEMENT": lambda self: self.expression( 1260 exp.XMLElement, 1261 this=self._match_text_seq("NAME") and self._parse_id_var(), 1262 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1263 ), 1264 "XMLTABLE": lambda self: self._parse_xml_table(), 1265 } 1266 1267 QUERY_MODIFIER_PARSERS = { 1268 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1269 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1270 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1271 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1272 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1273 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1274 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1275 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1276 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1277 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1278 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1279 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1280 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1281 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1282 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1283 TokenType.CLUSTER_BY: lambda self: ( 1284 "cluster", 1285 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1286 ), 1287 TokenType.DISTRIBUTE_BY: lambda self: ( 1288 "distribute", 1289 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1290 ), 1291 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1292 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1293 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1294 } 1295 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1296 1297 SET_PARSERS = { 1298 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1299 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1300 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1301 "TRANSACTION": lambda self: self._parse_set_transaction(), 1302 } 1303 1304 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1305 1306 TYPE_LITERAL_PARSERS = { 1307 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1308 } 1309 1310 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1311 1312 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1313 1314 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1315 1316 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1317 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1318 "ISOLATION": ( 1319 ("LEVEL", "REPEATABLE", "READ"), 1320 ("LEVEL", "READ", "COMMITTED"), 1321 ("LEVEL", "READ", "UNCOMITTED"), 1322 ("LEVEL", "SERIALIZABLE"), 1323 ), 1324 "READ": ("WRITE", "ONLY"), 1325 } 1326 1327 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1328 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1329 ) 1330 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1331 1332 CREATE_SEQUENCE: OPTIONS_TYPE = { 1333 "SCALE": ("EXTEND", "NOEXTEND"), 1334 "SHARD": ("EXTEND", "NOEXTEND"), 1335 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1336 **dict.fromkeys( 1337 ( 1338 "SESSION", 1339 "GLOBAL", 1340 "KEEP", 1341 "NOKEEP", 1342 "ORDER", 1343 "NOORDER", 1344 "NOCACHE", 1345 "CYCLE", 1346 "NOCYCLE", 1347 "NOMINVALUE", 1348 "NOMAXVALUE", 1349 "NOSCALE", 1350 "NOSHARD", 1351 ), 1352 tuple(), 1353 ), 1354 } 1355 1356 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1357 1358 USABLES: OPTIONS_TYPE = dict.fromkeys( 1359 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1360 ) 1361 1362 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1363 1364 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1365 "TYPE": ("EVOLUTION",), 1366 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1367 } 1368 1369 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1370 1371 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1372 1373 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1374 "NOT": ("ENFORCED",), 1375 "MATCH": ( 1376 "FULL", 1377 "PARTIAL", 1378 "SIMPLE", 1379 ), 1380 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1381 "USING": ( 1382 "BTREE", 1383 "HASH", 1384 ), 1385 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1386 } 1387 1388 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1389 "NO": ("OTHERS",), 1390 "CURRENT": ("ROW",), 1391 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1392 } 1393 1394 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1395 1396 CLONE_KEYWORDS = {"CLONE", "COPY"} 1397 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1398 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1399 1400 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1401 1402 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1403 1404 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1405 1406 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1407 1408 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1409 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1410 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1411 1412 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1413 1414 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1415 1416 ADD_CONSTRAINT_TOKENS = { 1417 TokenType.CONSTRAINT, 1418 TokenType.FOREIGN_KEY, 1419 TokenType.INDEX, 1420 TokenType.KEY, 1421 TokenType.PRIMARY_KEY, 1422 TokenType.UNIQUE, 1423 } 1424 1425 DISTINCT_TOKENS = {TokenType.DISTINCT} 1426 1427 NULL_TOKENS = {TokenType.NULL} 1428 1429 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1430 1431 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1432 1433 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1434 1435 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1436 1437 ODBC_DATETIME_LITERALS = { 1438 "d": exp.Date, 1439 "t": exp.Time, 1440 "ts": exp.Timestamp, 1441 } 1442 1443 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1444 1445 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1446 1447 # The style options for the DESCRIBE statement 1448 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1449 1450 # The style options for the ANALYZE statement 1451 ANALYZE_STYLES = { 1452 "BUFFER_USAGE_LIMIT", 1453 "FULL", 1454 "LOCAL", 1455 "NO_WRITE_TO_BINLOG", 1456 "SAMPLE", 1457 "SKIP_LOCKED", 1458 "VERBOSE", 1459 } 1460 1461 ANALYZE_EXPRESSION_PARSERS = { 1462 "ALL": lambda self: self._parse_analyze_columns(), 1463 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1464 "DELETE": lambda self: self._parse_analyze_delete(), 1465 "DROP": lambda self: self._parse_analyze_histogram(), 1466 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1467 "LIST": lambda self: self._parse_analyze_list(), 1468 "PREDICATE": lambda self: self._parse_analyze_columns(), 1469 "UPDATE": lambda self: self._parse_analyze_histogram(), 1470 "VALIDATE": lambda self: self._parse_analyze_validate(), 1471 } 1472 1473 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1474 1475 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1476 1477 OPERATION_MODIFIERS: t.Set[str] = set() 1478 1479 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1480 1481 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1482 1483 STRICT_CAST = True 1484 1485 PREFIXED_PIVOT_COLUMNS = False 1486 IDENTIFY_PIVOT_STRINGS = False 1487 1488 LOG_DEFAULTS_TO_LN = False 1489 1490 # Whether the table sample clause expects CSV syntax 1491 TABLESAMPLE_CSV = False 1492 1493 # The default method used for table sampling 1494 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1495 1496 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1497 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1498 1499 # Whether the TRIM function expects the characters to trim as its first argument 1500 TRIM_PATTERN_FIRST = False 1501 1502 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1503 STRING_ALIASES = False 1504 1505 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1506 MODIFIERS_ATTACHED_TO_SET_OP = True 1507 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1508 1509 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1510 NO_PAREN_IF_COMMANDS = True 1511 1512 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1513 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1514 1515 # Whether the `:` operator is used to extract a value from a VARIANT column 1516 COLON_IS_VARIANT_EXTRACT = False 1517 1518 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1519 # If this is True and '(' is not found, the keyword will be treated as an identifier 1520 VALUES_FOLLOWED_BY_PAREN = True 1521 1522 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1523 SUPPORTS_IMPLICIT_UNNEST = False 1524 1525 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1526 INTERVAL_SPANS = True 1527 1528 # Whether a PARTITION clause can follow a table reference 1529 SUPPORTS_PARTITION_SELECTION = False 1530 1531 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1532 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1533 1534 # Whether the 'AS' keyword is optional in the CTE definition syntax 1535 OPTIONAL_ALIAS_TOKEN_CTE = True 1536 1537 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1538 ALTER_RENAME_REQUIRES_COLUMN = True 1539 1540 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1541 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1542 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1543 # as BigQuery, where all joins have the same precedence. 1544 JOINS_HAVE_EQUAL_PRECEDENCE = False 1545 1546 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1547 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1548 1549 # Whether map literals support arbitrary expressions as keys. 1550 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1551 # When False, keys are typically restricted to identifiers. 1552 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1553 1554 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1555 # is true for Snowflake but not for BigQuery which can also process strings 1556 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1557 1558 __slots__ = ( 1559 "error_level", 1560 "error_message_context", 1561 "max_errors", 1562 "dialect", 1563 "sql", 1564 "errors", 1565 "_tokens", 1566 "_index", 1567 "_curr", 1568 "_next", 1569 "_prev", 1570 "_prev_comments", 1571 "_pipe_cte_counter", 1572 ) 1573 1574 # Autofilled 1575 SHOW_TRIE: t.Dict = {} 1576 SET_TRIE: t.Dict = {} 1577 1578 def __init__( 1579 self, 1580 error_level: t.Optional[ErrorLevel] = None, 1581 error_message_context: int = 100, 1582 max_errors: int = 3, 1583 dialect: DialectType = None, 1584 ): 1585 from sqlglot.dialects import Dialect 1586 1587 self.error_level = error_level or ErrorLevel.IMMEDIATE 1588 self.error_message_context = error_message_context 1589 self.max_errors = max_errors 1590 self.dialect = Dialect.get_or_raise(dialect) 1591 self.reset() 1592 1593 def reset(self): 1594 self.sql = "" 1595 self.errors = [] 1596 self._tokens = [] 1597 self._index = 0 1598 self._curr = None 1599 self._next = None 1600 self._prev = None 1601 self._prev_comments = None 1602 self._pipe_cte_counter = 0 1603 1604 def parse( 1605 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1606 ) -> t.List[t.Optional[exp.Expression]]: 1607 """ 1608 Parses a list of tokens and returns a list of syntax trees, one tree 1609 per parsed SQL statement. 1610 1611 Args: 1612 raw_tokens: The list of tokens. 1613 sql: The original SQL string, used to produce helpful debug messages. 1614 1615 Returns: 1616 The list of the produced syntax trees. 1617 """ 1618 return self._parse( 1619 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1620 ) 1621 1622 def parse_into( 1623 self, 1624 expression_types: exp.IntoType, 1625 raw_tokens: t.List[Token], 1626 sql: t.Optional[str] = None, 1627 ) -> t.List[t.Optional[exp.Expression]]: 1628 """ 1629 Parses a list of tokens into a given Expression type. If a collection of Expression 1630 types is given instead, this method will try to parse the token list into each one 1631 of them, stopping at the first for which the parsing succeeds. 1632 1633 Args: 1634 expression_types: The expression type(s) to try and parse the token list into. 1635 raw_tokens: The list of tokens. 1636 sql: The original SQL string, used to produce helpful debug messages. 1637 1638 Returns: 1639 The target Expression. 1640 """ 1641 errors = [] 1642 for expression_type in ensure_list(expression_types): 1643 parser = self.EXPRESSION_PARSERS.get(expression_type) 1644 if not parser: 1645 raise TypeError(f"No parser registered for {expression_type}") 1646 1647 try: 1648 return self._parse(parser, raw_tokens, sql) 1649 except ParseError as e: 1650 e.errors[0]["into_expression"] = expression_type 1651 errors.append(e) 1652 1653 raise ParseError( 1654 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1655 errors=merge_errors(errors), 1656 ) from errors[-1] 1657 1658 def _parse( 1659 self, 1660 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1661 raw_tokens: t.List[Token], 1662 sql: t.Optional[str] = None, 1663 ) -> t.List[t.Optional[exp.Expression]]: 1664 self.reset() 1665 self.sql = sql or "" 1666 1667 total = len(raw_tokens) 1668 chunks: t.List[t.List[Token]] = [[]] 1669 1670 for i, token in enumerate(raw_tokens): 1671 if token.token_type == TokenType.SEMICOLON: 1672 if token.comments: 1673 chunks.append([token]) 1674 1675 if i < total - 1: 1676 chunks.append([]) 1677 else: 1678 chunks[-1].append(token) 1679 1680 expressions = [] 1681 1682 for tokens in chunks: 1683 self._index = -1 1684 self._tokens = tokens 1685 self._advance() 1686 1687 expressions.append(parse_method(self)) 1688 1689 if self._index < len(self._tokens): 1690 self.raise_error("Invalid expression / Unexpected token") 1691 1692 self.check_errors() 1693 1694 return expressions 1695 1696 def check_errors(self) -> None: 1697 """Logs or raises any found errors, depending on the chosen error level setting.""" 1698 if self.error_level == ErrorLevel.WARN: 1699 for error in self.errors: 1700 logger.error(str(error)) 1701 elif self.error_level == ErrorLevel.RAISE and self.errors: 1702 raise ParseError( 1703 concat_messages(self.errors, self.max_errors), 1704 errors=merge_errors(self.errors), 1705 ) 1706 1707 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1708 """ 1709 Appends an error in the list of recorded errors or raises it, depending on the chosen 1710 error level setting. 1711 """ 1712 token = token or self._curr or self._prev or Token.string("") 1713 start = token.start 1714 end = token.end + 1 1715 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1716 highlight = self.sql[start:end] 1717 end_context = self.sql[end : end + self.error_message_context] 1718 1719 error = ParseError.new( 1720 f"{message}. Line {token.line}, Col: {token.col}.\n" 1721 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1722 description=message, 1723 line=token.line, 1724 col=token.col, 1725 start_context=start_context, 1726 highlight=highlight, 1727 end_context=end_context, 1728 ) 1729 1730 if self.error_level == ErrorLevel.IMMEDIATE: 1731 raise error 1732 1733 self.errors.append(error) 1734 1735 def expression( 1736 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1737 ) -> E: 1738 """ 1739 Creates a new, validated Expression. 1740 1741 Args: 1742 exp_class: The expression class to instantiate. 1743 comments: An optional list of comments to attach to the expression. 1744 kwargs: The arguments to set for the expression along with their respective values. 1745 1746 Returns: 1747 The target expression. 1748 """ 1749 instance = exp_class(**kwargs) 1750 instance.add_comments(comments) if comments else self._add_comments(instance) 1751 return self.validate_expression(instance) 1752 1753 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1754 if expression and self._prev_comments: 1755 expression.add_comments(self._prev_comments) 1756 self._prev_comments = None 1757 1758 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1759 """ 1760 Validates an Expression, making sure that all its mandatory arguments are set. 1761 1762 Args: 1763 expression: The expression to validate. 1764 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1765 1766 Returns: 1767 The validated expression. 1768 """ 1769 if self.error_level != ErrorLevel.IGNORE: 1770 for error_message in expression.error_messages(args): 1771 self.raise_error(error_message) 1772 1773 return expression 1774 1775 def _find_sql(self, start: Token, end: Token) -> str: 1776 return self.sql[start.start : end.end + 1] 1777 1778 def _is_connected(self) -> bool: 1779 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1780 1781 def _advance(self, times: int = 1) -> None: 1782 self._index += times 1783 self._curr = seq_get(self._tokens, self._index) 1784 self._next = seq_get(self._tokens, self._index + 1) 1785 1786 if self._index > 0: 1787 self._prev = self._tokens[self._index - 1] 1788 self._prev_comments = self._prev.comments 1789 else: 1790 self._prev = None 1791 self._prev_comments = None 1792 1793 def _retreat(self, index: int) -> None: 1794 if index != self._index: 1795 self._advance(index - self._index) 1796 1797 def _warn_unsupported(self) -> None: 1798 if len(self._tokens) <= 1: 1799 return 1800 1801 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1802 # interested in emitting a warning for the one being currently processed. 1803 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1804 1805 logger.warning( 1806 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1807 ) 1808 1809 def _parse_command(self) -> exp.Command: 1810 self._warn_unsupported() 1811 return self.expression( 1812 exp.Command, 1813 comments=self._prev_comments, 1814 this=self._prev.text.upper(), 1815 expression=self._parse_string(), 1816 ) 1817 1818 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1819 """ 1820 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1821 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1822 solve this by setting & resetting the parser state accordingly 1823 """ 1824 index = self._index 1825 error_level = self.error_level 1826 1827 self.error_level = ErrorLevel.IMMEDIATE 1828 try: 1829 this = parse_method() 1830 except ParseError: 1831 this = None 1832 finally: 1833 if not this or retreat: 1834 self._retreat(index) 1835 self.error_level = error_level 1836 1837 return this 1838 1839 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1840 start = self._prev 1841 exists = self._parse_exists() if allow_exists else None 1842 1843 self._match(TokenType.ON) 1844 1845 materialized = self._match_text_seq("MATERIALIZED") 1846 kind = self._match_set(self.CREATABLES) and self._prev 1847 if not kind: 1848 return self._parse_as_command(start) 1849 1850 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1851 this = self._parse_user_defined_function(kind=kind.token_type) 1852 elif kind.token_type == TokenType.TABLE: 1853 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1854 elif kind.token_type == TokenType.COLUMN: 1855 this = self._parse_column() 1856 else: 1857 this = self._parse_id_var() 1858 1859 self._match(TokenType.IS) 1860 1861 return self.expression( 1862 exp.Comment, 1863 this=this, 1864 kind=kind.text, 1865 expression=self._parse_string(), 1866 exists=exists, 1867 materialized=materialized, 1868 ) 1869 1870 def _parse_to_table( 1871 self, 1872 ) -> exp.ToTableProperty: 1873 table = self._parse_table_parts(schema=True) 1874 return self.expression(exp.ToTableProperty, this=table) 1875 1876 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1877 def _parse_ttl(self) -> exp.Expression: 1878 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1879 this = self._parse_bitwise() 1880 1881 if self._match_text_seq("DELETE"): 1882 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1883 if self._match_text_seq("RECOMPRESS"): 1884 return self.expression( 1885 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1886 ) 1887 if self._match_text_seq("TO", "DISK"): 1888 return self.expression( 1889 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1890 ) 1891 if self._match_text_seq("TO", "VOLUME"): 1892 return self.expression( 1893 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1894 ) 1895 1896 return this 1897 1898 expressions = self._parse_csv(_parse_ttl_action) 1899 where = self._parse_where() 1900 group = self._parse_group() 1901 1902 aggregates = None 1903 if group and self._match(TokenType.SET): 1904 aggregates = self._parse_csv(self._parse_set_item) 1905 1906 return self.expression( 1907 exp.MergeTreeTTL, 1908 expressions=expressions, 1909 where=where, 1910 group=group, 1911 aggregates=aggregates, 1912 ) 1913 1914 def _parse_statement(self) -> t.Optional[exp.Expression]: 1915 if self._curr is None: 1916 return None 1917 1918 if self._match_set(self.STATEMENT_PARSERS): 1919 comments = self._prev_comments 1920 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1921 stmt.add_comments(comments, prepend=True) 1922 return stmt 1923 1924 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1925 return self._parse_command() 1926 1927 expression = self._parse_expression() 1928 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1929 return self._parse_query_modifiers(expression) 1930 1931 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1932 start = self._prev 1933 temporary = self._match(TokenType.TEMPORARY) 1934 materialized = self._match_text_seq("MATERIALIZED") 1935 1936 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1937 if not kind: 1938 return self._parse_as_command(start) 1939 1940 concurrently = self._match_text_seq("CONCURRENTLY") 1941 if_exists = exists or self._parse_exists() 1942 1943 if kind == "COLUMN": 1944 this = self._parse_column() 1945 else: 1946 this = self._parse_table_parts( 1947 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1948 ) 1949 1950 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1951 1952 if self._match(TokenType.L_PAREN, advance=False): 1953 expressions = self._parse_wrapped_csv(self._parse_types) 1954 else: 1955 expressions = None 1956 1957 return self.expression( 1958 exp.Drop, 1959 exists=if_exists, 1960 this=this, 1961 expressions=expressions, 1962 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1963 temporary=temporary, 1964 materialized=materialized, 1965 cascade=self._match_text_seq("CASCADE"), 1966 constraints=self._match_text_seq("CONSTRAINTS"), 1967 purge=self._match_text_seq("PURGE"), 1968 cluster=cluster, 1969 concurrently=concurrently, 1970 ) 1971 1972 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1973 return ( 1974 self._match_text_seq("IF") 1975 and (not not_ or self._match(TokenType.NOT)) 1976 and self._match(TokenType.EXISTS) 1977 ) 1978 1979 def _parse_create(self) -> exp.Create | exp.Command: 1980 # Note: this can't be None because we've matched a statement parser 1981 start = self._prev 1982 1983 replace = ( 1984 start.token_type == TokenType.REPLACE 1985 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1986 or self._match_pair(TokenType.OR, TokenType.ALTER) 1987 ) 1988 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1989 1990 unique = self._match(TokenType.UNIQUE) 1991 1992 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1993 clustered = True 1994 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1995 "COLUMNSTORE" 1996 ): 1997 clustered = False 1998 else: 1999 clustered = None 2000 2001 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 2002 self._advance() 2003 2004 properties = None 2005 create_token = self._match_set(self.CREATABLES) and self._prev 2006 2007 if not create_token: 2008 # exp.Properties.Location.POST_CREATE 2009 properties = self._parse_properties() 2010 create_token = self._match_set(self.CREATABLES) and self._prev 2011 2012 if not properties or not create_token: 2013 return self._parse_as_command(start) 2014 2015 concurrently = self._match_text_seq("CONCURRENTLY") 2016 exists = self._parse_exists(not_=True) 2017 this = None 2018 expression: t.Optional[exp.Expression] = None 2019 indexes = None 2020 no_schema_binding = None 2021 begin = None 2022 end = None 2023 clone = None 2024 2025 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2026 nonlocal properties 2027 if properties and temp_props: 2028 properties.expressions.extend(temp_props.expressions) 2029 elif temp_props: 2030 properties = temp_props 2031 2032 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2033 this = self._parse_user_defined_function(kind=create_token.token_type) 2034 2035 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2036 extend_props(self._parse_properties()) 2037 2038 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2039 extend_props(self._parse_properties()) 2040 2041 if not expression: 2042 if self._match(TokenType.COMMAND): 2043 expression = self._parse_as_command(self._prev) 2044 else: 2045 begin = self._match(TokenType.BEGIN) 2046 return_ = self._match_text_seq("RETURN") 2047 2048 if self._match(TokenType.STRING, advance=False): 2049 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2050 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2051 expression = self._parse_string() 2052 extend_props(self._parse_properties()) 2053 else: 2054 expression = self._parse_user_defined_function_expression() 2055 2056 end = self._match_text_seq("END") 2057 2058 if return_: 2059 expression = self.expression(exp.Return, this=expression) 2060 elif create_token.token_type == TokenType.INDEX: 2061 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2062 if not self._match(TokenType.ON): 2063 index = self._parse_id_var() 2064 anonymous = False 2065 else: 2066 index = None 2067 anonymous = True 2068 2069 this = self._parse_index(index=index, anonymous=anonymous) 2070 elif create_token.token_type in self.DB_CREATABLES: 2071 table_parts = self._parse_table_parts( 2072 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2073 ) 2074 2075 # exp.Properties.Location.POST_NAME 2076 self._match(TokenType.COMMA) 2077 extend_props(self._parse_properties(before=True)) 2078 2079 this = self._parse_schema(this=table_parts) 2080 2081 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2082 extend_props(self._parse_properties()) 2083 2084 has_alias = self._match(TokenType.ALIAS) 2085 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2086 # exp.Properties.Location.POST_ALIAS 2087 extend_props(self._parse_properties()) 2088 2089 if create_token.token_type == TokenType.SEQUENCE: 2090 expression = self._parse_types() 2091 props = self._parse_properties() 2092 if props: 2093 sequence_props = exp.SequenceProperties() 2094 options = [] 2095 for prop in props: 2096 if isinstance(prop, exp.SequenceProperties): 2097 for arg, value in prop.args.items(): 2098 if arg == "options": 2099 options.extend(value) 2100 else: 2101 sequence_props.set(arg, value) 2102 prop.pop() 2103 2104 if options: 2105 sequence_props.set("options", options) 2106 2107 props.append("expressions", sequence_props) 2108 extend_props(props) 2109 else: 2110 expression = self._parse_ddl_select() 2111 2112 # Some dialects also support using a table as an alias instead of a SELECT. 2113 # Here we fallback to this as an alternative. 2114 if not expression and has_alias: 2115 expression = self._try_parse(self._parse_table_parts) 2116 2117 if create_token.token_type == TokenType.TABLE: 2118 # exp.Properties.Location.POST_EXPRESSION 2119 extend_props(self._parse_properties()) 2120 2121 indexes = [] 2122 while True: 2123 index = self._parse_index() 2124 2125 # exp.Properties.Location.POST_INDEX 2126 extend_props(self._parse_properties()) 2127 if not index: 2128 break 2129 else: 2130 self._match(TokenType.COMMA) 2131 indexes.append(index) 2132 elif create_token.token_type == TokenType.VIEW: 2133 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2134 no_schema_binding = True 2135 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2136 extend_props(self._parse_properties()) 2137 2138 shallow = self._match_text_seq("SHALLOW") 2139 2140 if self._match_texts(self.CLONE_KEYWORDS): 2141 copy = self._prev.text.lower() == "copy" 2142 clone = self.expression( 2143 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2144 ) 2145 2146 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2147 return self._parse_as_command(start) 2148 2149 create_kind_text = create_token.text.upper() 2150 return self.expression( 2151 exp.Create, 2152 this=this, 2153 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2154 replace=replace, 2155 refresh=refresh, 2156 unique=unique, 2157 expression=expression, 2158 exists=exists, 2159 properties=properties, 2160 indexes=indexes, 2161 no_schema_binding=no_schema_binding, 2162 begin=begin, 2163 end=end, 2164 clone=clone, 2165 concurrently=concurrently, 2166 clustered=clustered, 2167 ) 2168 2169 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2170 seq = exp.SequenceProperties() 2171 2172 options = [] 2173 index = self._index 2174 2175 while self._curr: 2176 self._match(TokenType.COMMA) 2177 if self._match_text_seq("INCREMENT"): 2178 self._match_text_seq("BY") 2179 self._match_text_seq("=") 2180 seq.set("increment", self._parse_term()) 2181 elif self._match_text_seq("MINVALUE"): 2182 seq.set("minvalue", self._parse_term()) 2183 elif self._match_text_seq("MAXVALUE"): 2184 seq.set("maxvalue", self._parse_term()) 2185 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2186 self._match_text_seq("=") 2187 seq.set("start", self._parse_term()) 2188 elif self._match_text_seq("CACHE"): 2189 # T-SQL allows empty CACHE which is initialized dynamically 2190 seq.set("cache", self._parse_number() or True) 2191 elif self._match_text_seq("OWNED", "BY"): 2192 # "OWNED BY NONE" is the default 2193 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2194 else: 2195 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2196 if opt: 2197 options.append(opt) 2198 else: 2199 break 2200 2201 seq.set("options", options if options else None) 2202 return None if self._index == index else seq 2203 2204 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2205 # only used for teradata currently 2206 self._match(TokenType.COMMA) 2207 2208 kwargs = { 2209 "no": self._match_text_seq("NO"), 2210 "dual": self._match_text_seq("DUAL"), 2211 "before": self._match_text_seq("BEFORE"), 2212 "default": self._match_text_seq("DEFAULT"), 2213 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2214 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2215 "after": self._match_text_seq("AFTER"), 2216 "minimum": self._match_texts(("MIN", "MINIMUM")), 2217 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2218 } 2219 2220 if self._match_texts(self.PROPERTY_PARSERS): 2221 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2222 try: 2223 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2224 except TypeError: 2225 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2226 2227 return None 2228 2229 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2230 return self._parse_wrapped_csv(self._parse_property) 2231 2232 def _parse_property(self) -> t.Optional[exp.Expression]: 2233 if self._match_texts(self.PROPERTY_PARSERS): 2234 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2235 2236 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2237 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2238 2239 if self._match_text_seq("COMPOUND", "SORTKEY"): 2240 return self._parse_sortkey(compound=True) 2241 2242 if self._match_text_seq("SQL", "SECURITY"): 2243 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2244 2245 index = self._index 2246 2247 seq_props = self._parse_sequence_properties() 2248 if seq_props: 2249 return seq_props 2250 2251 self._retreat(index) 2252 key = self._parse_column() 2253 2254 if not self._match(TokenType.EQ): 2255 self._retreat(index) 2256 return None 2257 2258 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2259 if isinstance(key, exp.Column): 2260 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2261 2262 value = self._parse_bitwise() or self._parse_var(any_token=True) 2263 2264 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2265 if isinstance(value, exp.Column): 2266 value = exp.var(value.name) 2267 2268 return self.expression(exp.Property, this=key, value=value) 2269 2270 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2271 if self._match_text_seq("BY"): 2272 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2273 2274 self._match(TokenType.ALIAS) 2275 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2276 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2277 2278 return self.expression( 2279 exp.FileFormatProperty, 2280 this=( 2281 self.expression( 2282 exp.InputOutputFormat, 2283 input_format=input_format, 2284 output_format=output_format, 2285 ) 2286 if input_format or output_format 2287 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2288 ), 2289 hive_format=True, 2290 ) 2291 2292 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2293 field = self._parse_field() 2294 if isinstance(field, exp.Identifier) and not field.quoted: 2295 field = exp.var(field) 2296 2297 return field 2298 2299 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2300 self._match(TokenType.EQ) 2301 self._match(TokenType.ALIAS) 2302 2303 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2304 2305 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2306 properties = [] 2307 while True: 2308 if before: 2309 prop = self._parse_property_before() 2310 else: 2311 prop = self._parse_property() 2312 if not prop: 2313 break 2314 for p in ensure_list(prop): 2315 properties.append(p) 2316 2317 if properties: 2318 return self.expression(exp.Properties, expressions=properties) 2319 2320 return None 2321 2322 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2323 return self.expression( 2324 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2325 ) 2326 2327 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2328 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2329 security_specifier = self._prev.text.upper() 2330 return self.expression(exp.SecurityProperty, this=security_specifier) 2331 return None 2332 2333 def _parse_settings_property(self) -> exp.SettingsProperty: 2334 return self.expression( 2335 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2336 ) 2337 2338 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2339 if self._index >= 2: 2340 pre_volatile_token = self._tokens[self._index - 2] 2341 else: 2342 pre_volatile_token = None 2343 2344 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2345 return exp.VolatileProperty() 2346 2347 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2348 2349 def _parse_retention_period(self) -> exp.Var: 2350 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2351 number = self._parse_number() 2352 number_str = f"{number} " if number else "" 2353 unit = self._parse_var(any_token=True) 2354 return exp.var(f"{number_str}{unit}") 2355 2356 def _parse_system_versioning_property( 2357 self, with_: bool = False 2358 ) -> exp.WithSystemVersioningProperty: 2359 self._match(TokenType.EQ) 2360 prop = self.expression( 2361 exp.WithSystemVersioningProperty, 2362 **{ # type: ignore 2363 "on": True, 2364 "with": with_, 2365 }, 2366 ) 2367 2368 if self._match_text_seq("OFF"): 2369 prop.set("on", False) 2370 return prop 2371 2372 self._match(TokenType.ON) 2373 if self._match(TokenType.L_PAREN): 2374 while self._curr and not self._match(TokenType.R_PAREN): 2375 if self._match_text_seq("HISTORY_TABLE", "="): 2376 prop.set("this", self._parse_table_parts()) 2377 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2378 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2379 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2380 prop.set("retention_period", self._parse_retention_period()) 2381 2382 self._match(TokenType.COMMA) 2383 2384 return prop 2385 2386 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2387 self._match(TokenType.EQ) 2388 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2389 prop = self.expression(exp.DataDeletionProperty, on=on) 2390 2391 if self._match(TokenType.L_PAREN): 2392 while self._curr and not self._match(TokenType.R_PAREN): 2393 if self._match_text_seq("FILTER_COLUMN", "="): 2394 prop.set("filter_column", self._parse_column()) 2395 elif self._match_text_seq("RETENTION_PERIOD", "="): 2396 prop.set("retention_period", self._parse_retention_period()) 2397 2398 self._match(TokenType.COMMA) 2399 2400 return prop 2401 2402 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2403 kind = "HASH" 2404 expressions: t.Optional[t.List[exp.Expression]] = None 2405 if self._match_text_seq("BY", "HASH"): 2406 expressions = self._parse_wrapped_csv(self._parse_id_var) 2407 elif self._match_text_seq("BY", "RANDOM"): 2408 kind = "RANDOM" 2409 2410 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2411 buckets: t.Optional[exp.Expression] = None 2412 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2413 buckets = self._parse_number() 2414 2415 return self.expression( 2416 exp.DistributedByProperty, 2417 expressions=expressions, 2418 kind=kind, 2419 buckets=buckets, 2420 order=self._parse_order(), 2421 ) 2422 2423 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2424 self._match_text_seq("KEY") 2425 expressions = self._parse_wrapped_id_vars() 2426 return self.expression(expr_type, expressions=expressions) 2427 2428 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2429 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2430 prop = self._parse_system_versioning_property(with_=True) 2431 self._match_r_paren() 2432 return prop 2433 2434 if self._match(TokenType.L_PAREN, advance=False): 2435 return self._parse_wrapped_properties() 2436 2437 if self._match_text_seq("JOURNAL"): 2438 return self._parse_withjournaltable() 2439 2440 if self._match_texts(self.VIEW_ATTRIBUTES): 2441 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2442 2443 if self._match_text_seq("DATA"): 2444 return self._parse_withdata(no=False) 2445 elif self._match_text_seq("NO", "DATA"): 2446 return self._parse_withdata(no=True) 2447 2448 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2449 return self._parse_serde_properties(with_=True) 2450 2451 if self._match(TokenType.SCHEMA): 2452 return self.expression( 2453 exp.WithSchemaBindingProperty, 2454 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2455 ) 2456 2457 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2458 return self.expression( 2459 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2460 ) 2461 2462 if not self._next: 2463 return None 2464 2465 return self._parse_withisolatedloading() 2466 2467 def _parse_procedure_option(self) -> exp.Expression | None: 2468 if self._match_text_seq("EXECUTE", "AS"): 2469 return self.expression( 2470 exp.ExecuteAsProperty, 2471 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2472 or self._parse_string(), 2473 ) 2474 2475 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2476 2477 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2478 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2479 self._match(TokenType.EQ) 2480 2481 user = self._parse_id_var() 2482 self._match(TokenType.PARAMETER) 2483 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2484 2485 if not user or not host: 2486 return None 2487 2488 return exp.DefinerProperty(this=f"{user}@{host}") 2489 2490 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2491 self._match(TokenType.TABLE) 2492 self._match(TokenType.EQ) 2493 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2494 2495 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2496 return self.expression(exp.LogProperty, no=no) 2497 2498 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2499 return self.expression(exp.JournalProperty, **kwargs) 2500 2501 def _parse_checksum(self) -> exp.ChecksumProperty: 2502 self._match(TokenType.EQ) 2503 2504 on = None 2505 if self._match(TokenType.ON): 2506 on = True 2507 elif self._match_text_seq("OFF"): 2508 on = False 2509 2510 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2511 2512 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2513 return self.expression( 2514 exp.Cluster, 2515 expressions=( 2516 self._parse_wrapped_csv(self._parse_ordered) 2517 if wrapped 2518 else self._parse_csv(self._parse_ordered) 2519 ), 2520 ) 2521 2522 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2523 self._match_text_seq("BY") 2524 2525 self._match_l_paren() 2526 expressions = self._parse_csv(self._parse_column) 2527 self._match_r_paren() 2528 2529 if self._match_text_seq("SORTED", "BY"): 2530 self._match_l_paren() 2531 sorted_by = self._parse_csv(self._parse_ordered) 2532 self._match_r_paren() 2533 else: 2534 sorted_by = None 2535 2536 self._match(TokenType.INTO) 2537 buckets = self._parse_number() 2538 self._match_text_seq("BUCKETS") 2539 2540 return self.expression( 2541 exp.ClusteredByProperty, 2542 expressions=expressions, 2543 sorted_by=sorted_by, 2544 buckets=buckets, 2545 ) 2546 2547 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2548 if not self._match_text_seq("GRANTS"): 2549 self._retreat(self._index - 1) 2550 return None 2551 2552 return self.expression(exp.CopyGrantsProperty) 2553 2554 def _parse_freespace(self) -> exp.FreespaceProperty: 2555 self._match(TokenType.EQ) 2556 return self.expression( 2557 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2558 ) 2559 2560 def _parse_mergeblockratio( 2561 self, no: bool = False, default: bool = False 2562 ) -> exp.MergeBlockRatioProperty: 2563 if self._match(TokenType.EQ): 2564 return self.expression( 2565 exp.MergeBlockRatioProperty, 2566 this=self._parse_number(), 2567 percent=self._match(TokenType.PERCENT), 2568 ) 2569 2570 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2571 2572 def _parse_datablocksize( 2573 self, 2574 default: t.Optional[bool] = None, 2575 minimum: t.Optional[bool] = None, 2576 maximum: t.Optional[bool] = None, 2577 ) -> exp.DataBlocksizeProperty: 2578 self._match(TokenType.EQ) 2579 size = self._parse_number() 2580 2581 units = None 2582 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2583 units = self._prev.text 2584 2585 return self.expression( 2586 exp.DataBlocksizeProperty, 2587 size=size, 2588 units=units, 2589 default=default, 2590 minimum=minimum, 2591 maximum=maximum, 2592 ) 2593 2594 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2595 self._match(TokenType.EQ) 2596 always = self._match_text_seq("ALWAYS") 2597 manual = self._match_text_seq("MANUAL") 2598 never = self._match_text_seq("NEVER") 2599 default = self._match_text_seq("DEFAULT") 2600 2601 autotemp = None 2602 if self._match_text_seq("AUTOTEMP"): 2603 autotemp = self._parse_schema() 2604 2605 return self.expression( 2606 exp.BlockCompressionProperty, 2607 always=always, 2608 manual=manual, 2609 never=never, 2610 default=default, 2611 autotemp=autotemp, 2612 ) 2613 2614 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2615 index = self._index 2616 no = self._match_text_seq("NO") 2617 concurrent = self._match_text_seq("CONCURRENT") 2618 2619 if not self._match_text_seq("ISOLATED", "LOADING"): 2620 self._retreat(index) 2621 return None 2622 2623 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2624 return self.expression( 2625 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2626 ) 2627 2628 def _parse_locking(self) -> exp.LockingProperty: 2629 if self._match(TokenType.TABLE): 2630 kind = "TABLE" 2631 elif self._match(TokenType.VIEW): 2632 kind = "VIEW" 2633 elif self._match(TokenType.ROW): 2634 kind = "ROW" 2635 elif self._match_text_seq("DATABASE"): 2636 kind = "DATABASE" 2637 else: 2638 kind = None 2639 2640 if kind in ("DATABASE", "TABLE", "VIEW"): 2641 this = self._parse_table_parts() 2642 else: 2643 this = None 2644 2645 if self._match(TokenType.FOR): 2646 for_or_in = "FOR" 2647 elif self._match(TokenType.IN): 2648 for_or_in = "IN" 2649 else: 2650 for_or_in = None 2651 2652 if self._match_text_seq("ACCESS"): 2653 lock_type = "ACCESS" 2654 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2655 lock_type = "EXCLUSIVE" 2656 elif self._match_text_seq("SHARE"): 2657 lock_type = "SHARE" 2658 elif self._match_text_seq("READ"): 2659 lock_type = "READ" 2660 elif self._match_text_seq("WRITE"): 2661 lock_type = "WRITE" 2662 elif self._match_text_seq("CHECKSUM"): 2663 lock_type = "CHECKSUM" 2664 else: 2665 lock_type = None 2666 2667 override = self._match_text_seq("OVERRIDE") 2668 2669 return self.expression( 2670 exp.LockingProperty, 2671 this=this, 2672 kind=kind, 2673 for_or_in=for_or_in, 2674 lock_type=lock_type, 2675 override=override, 2676 ) 2677 2678 def _parse_partition_by(self) -> t.List[exp.Expression]: 2679 if self._match(TokenType.PARTITION_BY): 2680 return self._parse_csv(self._parse_assignment) 2681 return [] 2682 2683 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2684 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2685 if self._match_text_seq("MINVALUE"): 2686 return exp.var("MINVALUE") 2687 if self._match_text_seq("MAXVALUE"): 2688 return exp.var("MAXVALUE") 2689 return self._parse_bitwise() 2690 2691 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2692 expression = None 2693 from_expressions = None 2694 to_expressions = None 2695 2696 if self._match(TokenType.IN): 2697 this = self._parse_wrapped_csv(self._parse_bitwise) 2698 elif self._match(TokenType.FROM): 2699 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2700 self._match_text_seq("TO") 2701 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2702 elif self._match_text_seq("WITH", "(", "MODULUS"): 2703 this = self._parse_number() 2704 self._match_text_seq(",", "REMAINDER") 2705 expression = self._parse_number() 2706 self._match_r_paren() 2707 else: 2708 self.raise_error("Failed to parse partition bound spec.") 2709 2710 return self.expression( 2711 exp.PartitionBoundSpec, 2712 this=this, 2713 expression=expression, 2714 from_expressions=from_expressions, 2715 to_expressions=to_expressions, 2716 ) 2717 2718 # https://www.postgresql.org/docs/current/sql-createtable.html 2719 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2720 if not self._match_text_seq("OF"): 2721 self._retreat(self._index - 1) 2722 return None 2723 2724 this = self._parse_table(schema=True) 2725 2726 if self._match(TokenType.DEFAULT): 2727 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2728 elif self._match_text_seq("FOR", "VALUES"): 2729 expression = self._parse_partition_bound_spec() 2730 else: 2731 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2732 2733 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2734 2735 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2736 self._match(TokenType.EQ) 2737 return self.expression( 2738 exp.PartitionedByProperty, 2739 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2740 ) 2741 2742 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2743 if self._match_text_seq("AND", "STATISTICS"): 2744 statistics = True 2745 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2746 statistics = False 2747 else: 2748 statistics = None 2749 2750 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2751 2752 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2753 if self._match_text_seq("SQL"): 2754 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2755 return None 2756 2757 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2758 if self._match_text_seq("SQL", "DATA"): 2759 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2760 return None 2761 2762 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2763 if self._match_text_seq("PRIMARY", "INDEX"): 2764 return exp.NoPrimaryIndexProperty() 2765 if self._match_text_seq("SQL"): 2766 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2767 return None 2768 2769 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2770 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2771 return exp.OnCommitProperty() 2772 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2773 return exp.OnCommitProperty(delete=True) 2774 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2775 2776 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2777 if self._match_text_seq("SQL", "DATA"): 2778 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2779 return None 2780 2781 def _parse_distkey(self) -> exp.DistKeyProperty: 2782 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2783 2784 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2785 table = self._parse_table(schema=True) 2786 2787 options = [] 2788 while self._match_texts(("INCLUDING", "EXCLUDING")): 2789 this = self._prev.text.upper() 2790 2791 id_var = self._parse_id_var() 2792 if not id_var: 2793 return None 2794 2795 options.append( 2796 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2797 ) 2798 2799 return self.expression(exp.LikeProperty, this=table, expressions=options) 2800 2801 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2802 return self.expression( 2803 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2804 ) 2805 2806 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2807 self._match(TokenType.EQ) 2808 return self.expression( 2809 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2810 ) 2811 2812 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2813 self._match_text_seq("WITH", "CONNECTION") 2814 return self.expression( 2815 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2816 ) 2817 2818 def _parse_returns(self) -> exp.ReturnsProperty: 2819 value: t.Optional[exp.Expression] 2820 null = None 2821 is_table = self._match(TokenType.TABLE) 2822 2823 if is_table: 2824 if self._match(TokenType.LT): 2825 value = self.expression( 2826 exp.Schema, 2827 this="TABLE", 2828 expressions=self._parse_csv(self._parse_struct_types), 2829 ) 2830 if not self._match(TokenType.GT): 2831 self.raise_error("Expecting >") 2832 else: 2833 value = self._parse_schema(exp.var("TABLE")) 2834 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2835 null = True 2836 value = None 2837 else: 2838 value = self._parse_types() 2839 2840 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2841 2842 def _parse_describe(self) -> exp.Describe: 2843 kind = self._match_set(self.CREATABLES) and self._prev.text 2844 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2845 if self._match(TokenType.DOT): 2846 style = None 2847 self._retreat(self._index - 2) 2848 2849 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2850 2851 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2852 this = self._parse_statement() 2853 else: 2854 this = self._parse_table(schema=True) 2855 2856 properties = self._parse_properties() 2857 expressions = properties.expressions if properties else None 2858 partition = self._parse_partition() 2859 return self.expression( 2860 exp.Describe, 2861 this=this, 2862 style=style, 2863 kind=kind, 2864 expressions=expressions, 2865 partition=partition, 2866 format=format, 2867 ) 2868 2869 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2870 kind = self._prev.text.upper() 2871 expressions = [] 2872 2873 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2874 if self._match(TokenType.WHEN): 2875 expression = self._parse_disjunction() 2876 self._match(TokenType.THEN) 2877 else: 2878 expression = None 2879 2880 else_ = self._match(TokenType.ELSE) 2881 2882 if not self._match(TokenType.INTO): 2883 return None 2884 2885 return self.expression( 2886 exp.ConditionalInsert, 2887 this=self.expression( 2888 exp.Insert, 2889 this=self._parse_table(schema=True), 2890 expression=self._parse_derived_table_values(), 2891 ), 2892 expression=expression, 2893 else_=else_, 2894 ) 2895 2896 expression = parse_conditional_insert() 2897 while expression is not None: 2898 expressions.append(expression) 2899 expression = parse_conditional_insert() 2900 2901 return self.expression( 2902 exp.MultitableInserts, 2903 kind=kind, 2904 comments=comments, 2905 expressions=expressions, 2906 source=self._parse_table(), 2907 ) 2908 2909 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2910 comments = [] 2911 hint = self._parse_hint() 2912 overwrite = self._match(TokenType.OVERWRITE) 2913 ignore = self._match(TokenType.IGNORE) 2914 local = self._match_text_seq("LOCAL") 2915 alternative = None 2916 is_function = None 2917 2918 if self._match_text_seq("DIRECTORY"): 2919 this: t.Optional[exp.Expression] = self.expression( 2920 exp.Directory, 2921 this=self._parse_var_or_string(), 2922 local=local, 2923 row_format=self._parse_row_format(match_row=True), 2924 ) 2925 else: 2926 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2927 comments += ensure_list(self._prev_comments) 2928 return self._parse_multitable_inserts(comments) 2929 2930 if self._match(TokenType.OR): 2931 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2932 2933 self._match(TokenType.INTO) 2934 comments += ensure_list(self._prev_comments) 2935 self._match(TokenType.TABLE) 2936 is_function = self._match(TokenType.FUNCTION) 2937 2938 this = ( 2939 self._parse_table(schema=True, parse_partition=True) 2940 if not is_function 2941 else self._parse_function() 2942 ) 2943 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2944 this.set("alias", self._parse_table_alias()) 2945 2946 returning = self._parse_returning() 2947 2948 return self.expression( 2949 exp.Insert, 2950 comments=comments, 2951 hint=hint, 2952 is_function=is_function, 2953 this=this, 2954 stored=self._match_text_seq("STORED") and self._parse_stored(), 2955 by_name=self._match_text_seq("BY", "NAME"), 2956 exists=self._parse_exists(), 2957 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2958 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2959 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2960 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2961 conflict=self._parse_on_conflict(), 2962 returning=returning or self._parse_returning(), 2963 overwrite=overwrite, 2964 alternative=alternative, 2965 ignore=ignore, 2966 source=self._match(TokenType.TABLE) and self._parse_table(), 2967 ) 2968 2969 def _parse_kill(self) -> exp.Kill: 2970 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2971 2972 return self.expression( 2973 exp.Kill, 2974 this=self._parse_primary(), 2975 kind=kind, 2976 ) 2977 2978 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2979 conflict = self._match_text_seq("ON", "CONFLICT") 2980 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2981 2982 if not conflict and not duplicate: 2983 return None 2984 2985 conflict_keys = None 2986 constraint = None 2987 2988 if conflict: 2989 if self._match_text_seq("ON", "CONSTRAINT"): 2990 constraint = self._parse_id_var() 2991 elif self._match(TokenType.L_PAREN): 2992 conflict_keys = self._parse_csv(self._parse_id_var) 2993 self._match_r_paren() 2994 2995 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2996 if self._prev.token_type == TokenType.UPDATE: 2997 self._match(TokenType.SET) 2998 expressions = self._parse_csv(self._parse_equality) 2999 else: 3000 expressions = None 3001 3002 return self.expression( 3003 exp.OnConflict, 3004 duplicate=duplicate, 3005 expressions=expressions, 3006 action=action, 3007 conflict_keys=conflict_keys, 3008 constraint=constraint, 3009 where=self._parse_where(), 3010 ) 3011 3012 def _parse_returning(self) -> t.Optional[exp.Returning]: 3013 if not self._match(TokenType.RETURNING): 3014 return None 3015 return self.expression( 3016 exp.Returning, 3017 expressions=self._parse_csv(self._parse_expression), 3018 into=self._match(TokenType.INTO) and self._parse_table_part(), 3019 ) 3020 3021 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3022 if not self._match(TokenType.FORMAT): 3023 return None 3024 return self._parse_row_format() 3025 3026 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 3027 index = self._index 3028 with_ = with_ or self._match_text_seq("WITH") 3029 3030 if not self._match(TokenType.SERDE_PROPERTIES): 3031 self._retreat(index) 3032 return None 3033 return self.expression( 3034 exp.SerdeProperties, 3035 **{ # type: ignore 3036 "expressions": self._parse_wrapped_properties(), 3037 "with": with_, 3038 }, 3039 ) 3040 3041 def _parse_row_format( 3042 self, match_row: bool = False 3043 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3044 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3045 return None 3046 3047 if self._match_text_seq("SERDE"): 3048 this = self._parse_string() 3049 3050 serde_properties = self._parse_serde_properties() 3051 3052 return self.expression( 3053 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3054 ) 3055 3056 self._match_text_seq("DELIMITED") 3057 3058 kwargs = {} 3059 3060 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3061 kwargs["fields"] = self._parse_string() 3062 if self._match_text_seq("ESCAPED", "BY"): 3063 kwargs["escaped"] = self._parse_string() 3064 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3065 kwargs["collection_items"] = self._parse_string() 3066 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3067 kwargs["map_keys"] = self._parse_string() 3068 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3069 kwargs["lines"] = self._parse_string() 3070 if self._match_text_seq("NULL", "DEFINED", "AS"): 3071 kwargs["null"] = self._parse_string() 3072 3073 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3074 3075 def _parse_load(self) -> exp.LoadData | exp.Command: 3076 if self._match_text_seq("DATA"): 3077 local = self._match_text_seq("LOCAL") 3078 self._match_text_seq("INPATH") 3079 inpath = self._parse_string() 3080 overwrite = self._match(TokenType.OVERWRITE) 3081 self._match_pair(TokenType.INTO, TokenType.TABLE) 3082 3083 return self.expression( 3084 exp.LoadData, 3085 this=self._parse_table(schema=True), 3086 local=local, 3087 overwrite=overwrite, 3088 inpath=inpath, 3089 partition=self._parse_partition(), 3090 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3091 serde=self._match_text_seq("SERDE") and self._parse_string(), 3092 ) 3093 return self._parse_as_command(self._prev) 3094 3095 def _parse_delete(self) -> exp.Delete: 3096 # This handles MySQL's "Multiple-Table Syntax" 3097 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3098 tables = None 3099 if not self._match(TokenType.FROM, advance=False): 3100 tables = self._parse_csv(self._parse_table) or None 3101 3102 returning = self._parse_returning() 3103 3104 return self.expression( 3105 exp.Delete, 3106 tables=tables, 3107 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3108 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3109 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3110 where=self._parse_where(), 3111 returning=returning or self._parse_returning(), 3112 limit=self._parse_limit(), 3113 ) 3114 3115 def _parse_update(self) -> exp.Update: 3116 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3117 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3118 returning = self._parse_returning() 3119 return self.expression( 3120 exp.Update, 3121 **{ # type: ignore 3122 "this": this, 3123 "expressions": expressions, 3124 "from": self._parse_from(joins=True), 3125 "where": self._parse_where(), 3126 "returning": returning or self._parse_returning(), 3127 "order": self._parse_order(), 3128 "limit": self._parse_limit(), 3129 }, 3130 ) 3131 3132 def _parse_use(self) -> exp.Use: 3133 return self.expression( 3134 exp.Use, 3135 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3136 this=self._parse_table(schema=False), 3137 ) 3138 3139 def _parse_uncache(self) -> exp.Uncache: 3140 if not self._match(TokenType.TABLE): 3141 self.raise_error("Expecting TABLE after UNCACHE") 3142 3143 return self.expression( 3144 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3145 ) 3146 3147 def _parse_cache(self) -> exp.Cache: 3148 lazy = self._match_text_seq("LAZY") 3149 self._match(TokenType.TABLE) 3150 table = self._parse_table(schema=True) 3151 3152 options = [] 3153 if self._match_text_seq("OPTIONS"): 3154 self._match_l_paren() 3155 k = self._parse_string() 3156 self._match(TokenType.EQ) 3157 v = self._parse_string() 3158 options = [k, v] 3159 self._match_r_paren() 3160 3161 self._match(TokenType.ALIAS) 3162 return self.expression( 3163 exp.Cache, 3164 this=table, 3165 lazy=lazy, 3166 options=options, 3167 expression=self._parse_select(nested=True), 3168 ) 3169 3170 def _parse_partition(self) -> t.Optional[exp.Partition]: 3171 if not self._match_texts(self.PARTITION_KEYWORDS): 3172 return None 3173 3174 return self.expression( 3175 exp.Partition, 3176 subpartition=self._prev.text.upper() == "SUBPARTITION", 3177 expressions=self._parse_wrapped_csv(self._parse_assignment), 3178 ) 3179 3180 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3181 def _parse_value_expression() -> t.Optional[exp.Expression]: 3182 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3183 return exp.var(self._prev.text.upper()) 3184 return self._parse_expression() 3185 3186 if self._match(TokenType.L_PAREN): 3187 expressions = self._parse_csv(_parse_value_expression) 3188 self._match_r_paren() 3189 return self.expression(exp.Tuple, expressions=expressions) 3190 3191 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3192 expression = self._parse_expression() 3193 if expression: 3194 return self.expression(exp.Tuple, expressions=[expression]) 3195 return None 3196 3197 def _parse_projections(self) -> t.List[exp.Expression]: 3198 return self._parse_expressions() 3199 3200 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3201 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3202 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3203 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3204 ) 3205 elif self._match(TokenType.FROM): 3206 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3207 # Support parentheses for duckdb FROM-first syntax 3208 select = self._parse_select() 3209 if select: 3210 select.set("from", from_) 3211 this = select 3212 else: 3213 this = exp.select("*").from_(t.cast(exp.From, from_)) 3214 else: 3215 this = ( 3216 self._parse_table(consume_pipe=True) 3217 if table 3218 else self._parse_select(nested=True, parse_set_operation=False) 3219 ) 3220 3221 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3222 # in case a modifier (e.g. join) is following 3223 if table and isinstance(this, exp.Values) and this.alias: 3224 alias = this.args["alias"].pop() 3225 this = exp.Table(this=this, alias=alias) 3226 3227 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3228 3229 return this 3230 3231 def _parse_select( 3232 self, 3233 nested: bool = False, 3234 table: bool = False, 3235 parse_subquery_alias: bool = True, 3236 parse_set_operation: bool = True, 3237 consume_pipe: bool = True, 3238 ) -> t.Optional[exp.Expression]: 3239 query = self._parse_select_query( 3240 nested=nested, 3241 table=table, 3242 parse_subquery_alias=parse_subquery_alias, 3243 parse_set_operation=parse_set_operation, 3244 ) 3245 3246 if ( 3247 consume_pipe 3248 and self._match(TokenType.PIPE_GT, advance=False) 3249 and isinstance(query, exp.Query) 3250 ): 3251 query = self._parse_pipe_syntax_query(query) 3252 query = query.subquery(copy=False) if query and table else query 3253 3254 return query 3255 3256 def _parse_select_query( 3257 self, 3258 nested: bool = False, 3259 table: bool = False, 3260 parse_subquery_alias: bool = True, 3261 parse_set_operation: bool = True, 3262 ) -> t.Optional[exp.Expression]: 3263 cte = self._parse_with() 3264 3265 if cte: 3266 this = self._parse_statement() 3267 3268 if not this: 3269 self.raise_error("Failed to parse any statement following CTE") 3270 return cte 3271 3272 if "with" in this.arg_types: 3273 this.set("with", cte) 3274 else: 3275 self.raise_error(f"{this.key} does not support CTE") 3276 this = cte 3277 3278 return this 3279 3280 # duckdb supports leading with FROM x 3281 from_ = ( 3282 self._parse_from(consume_pipe=True) 3283 if self._match(TokenType.FROM, advance=False) 3284 else None 3285 ) 3286 3287 if self._match(TokenType.SELECT): 3288 comments = self._prev_comments 3289 3290 hint = self._parse_hint() 3291 3292 if self._next and not self._next.token_type == TokenType.DOT: 3293 all_ = self._match(TokenType.ALL) 3294 distinct = self._match_set(self.DISTINCT_TOKENS) 3295 else: 3296 all_, distinct = None, None 3297 3298 kind = ( 3299 self._match(TokenType.ALIAS) 3300 and self._match_texts(("STRUCT", "VALUE")) 3301 and self._prev.text.upper() 3302 ) 3303 3304 if distinct: 3305 distinct = self.expression( 3306 exp.Distinct, 3307 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3308 ) 3309 3310 if all_ and distinct: 3311 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3312 3313 operation_modifiers = [] 3314 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3315 operation_modifiers.append(exp.var(self._prev.text.upper())) 3316 3317 limit = self._parse_limit(top=True) 3318 projections = self._parse_projections() 3319 3320 this = self.expression( 3321 exp.Select, 3322 kind=kind, 3323 hint=hint, 3324 distinct=distinct, 3325 expressions=projections, 3326 limit=limit, 3327 operation_modifiers=operation_modifiers or None, 3328 ) 3329 this.comments = comments 3330 3331 into = self._parse_into() 3332 if into: 3333 this.set("into", into) 3334 3335 if not from_: 3336 from_ = self._parse_from() 3337 3338 if from_: 3339 this.set("from", from_) 3340 3341 this = self._parse_query_modifiers(this) 3342 elif (table or nested) and self._match(TokenType.L_PAREN): 3343 this = self._parse_wrapped_select(table=table) 3344 3345 # We return early here so that the UNION isn't attached to the subquery by the 3346 # following call to _parse_set_operations, but instead becomes the parent node 3347 self._match_r_paren() 3348 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3349 elif self._match(TokenType.VALUES, advance=False): 3350 this = self._parse_derived_table_values() 3351 elif from_: 3352 this = exp.select("*").from_(from_.this, copy=False) 3353 elif self._match(TokenType.SUMMARIZE): 3354 table = self._match(TokenType.TABLE) 3355 this = self._parse_select() or self._parse_string() or self._parse_table() 3356 return self.expression(exp.Summarize, this=this, table=table) 3357 elif self._match(TokenType.DESCRIBE): 3358 this = self._parse_describe() 3359 elif self._match_text_seq("STREAM"): 3360 this = self._parse_function() 3361 if this: 3362 this = self.expression(exp.Stream, this=this) 3363 else: 3364 self._retreat(self._index - 1) 3365 else: 3366 this = None 3367 3368 return self._parse_set_operations(this) if parse_set_operation else this 3369 3370 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3371 self._match_text_seq("SEARCH") 3372 3373 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3374 3375 if not kind: 3376 return None 3377 3378 self._match_text_seq("FIRST", "BY") 3379 3380 return self.expression( 3381 exp.RecursiveWithSearch, 3382 kind=kind, 3383 this=self._parse_id_var(), 3384 expression=self._match_text_seq("SET") and self._parse_id_var(), 3385 using=self._match_text_seq("USING") and self._parse_id_var(), 3386 ) 3387 3388 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3389 if not skip_with_token and not self._match(TokenType.WITH): 3390 return None 3391 3392 comments = self._prev_comments 3393 recursive = self._match(TokenType.RECURSIVE) 3394 3395 last_comments = None 3396 expressions = [] 3397 while True: 3398 cte = self._parse_cte() 3399 if isinstance(cte, exp.CTE): 3400 expressions.append(cte) 3401 if last_comments: 3402 cte.add_comments(last_comments) 3403 3404 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3405 break 3406 else: 3407 self._match(TokenType.WITH) 3408 3409 last_comments = self._prev_comments 3410 3411 return self.expression( 3412 exp.With, 3413 comments=comments, 3414 expressions=expressions, 3415 recursive=recursive, 3416 search=self._parse_recursive_with_search(), 3417 ) 3418 3419 def _parse_cte(self) -> t.Optional[exp.CTE]: 3420 index = self._index 3421 3422 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3423 if not alias or not alias.this: 3424 self.raise_error("Expected CTE to have alias") 3425 3426 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3427 self._retreat(index) 3428 return None 3429 3430 comments = self._prev_comments 3431 3432 if self._match_text_seq("NOT", "MATERIALIZED"): 3433 materialized = False 3434 elif self._match_text_seq("MATERIALIZED"): 3435 materialized = True 3436 else: 3437 materialized = None 3438 3439 cte = self.expression( 3440 exp.CTE, 3441 this=self._parse_wrapped(self._parse_statement), 3442 alias=alias, 3443 materialized=materialized, 3444 comments=comments, 3445 ) 3446 3447 values = cte.this 3448 if isinstance(values, exp.Values): 3449 if values.alias: 3450 cte.set("this", exp.select("*").from_(values)) 3451 else: 3452 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3453 3454 return cte 3455 3456 def _parse_table_alias( 3457 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3458 ) -> t.Optional[exp.TableAlias]: 3459 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3460 # so this section tries to parse the clause version and if it fails, it treats the token 3461 # as an identifier (alias) 3462 if self._can_parse_limit_or_offset(): 3463 return None 3464 3465 any_token = self._match(TokenType.ALIAS) 3466 alias = ( 3467 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3468 or self._parse_string_as_identifier() 3469 ) 3470 3471 index = self._index 3472 if self._match(TokenType.L_PAREN): 3473 columns = self._parse_csv(self._parse_function_parameter) 3474 self._match_r_paren() if columns else self._retreat(index) 3475 else: 3476 columns = None 3477 3478 if not alias and not columns: 3479 return None 3480 3481 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3482 3483 # We bubble up comments from the Identifier to the TableAlias 3484 if isinstance(alias, exp.Identifier): 3485 table_alias.add_comments(alias.pop_comments()) 3486 3487 return table_alias 3488 3489 def _parse_subquery( 3490 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3491 ) -> t.Optional[exp.Subquery]: 3492 if not this: 3493 return None 3494 3495 return self.expression( 3496 exp.Subquery, 3497 this=this, 3498 pivots=self._parse_pivots(), 3499 alias=self._parse_table_alias() if parse_alias else None, 3500 sample=self._parse_table_sample(), 3501 ) 3502 3503 def _implicit_unnests_to_explicit(self, this: E) -> E: 3504 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3505 3506 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3507 for i, join in enumerate(this.args.get("joins") or []): 3508 table = join.this 3509 normalized_table = table.copy() 3510 normalized_table.meta["maybe_column"] = True 3511 normalized_table = _norm(normalized_table, dialect=self.dialect) 3512 3513 if isinstance(table, exp.Table) and not join.args.get("on"): 3514 if normalized_table.parts[0].name in refs: 3515 table_as_column = table.to_column() 3516 unnest = exp.Unnest(expressions=[table_as_column]) 3517 3518 # Table.to_column creates a parent Alias node that we want to convert to 3519 # a TableAlias and attach to the Unnest, so it matches the parser's output 3520 if isinstance(table.args.get("alias"), exp.TableAlias): 3521 table_as_column.replace(table_as_column.this) 3522 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3523 3524 table.replace(unnest) 3525 3526 refs.add(normalized_table.alias_or_name) 3527 3528 return this 3529 3530 def _parse_query_modifiers( 3531 self, this: t.Optional[exp.Expression] 3532 ) -> t.Optional[exp.Expression]: 3533 if isinstance(this, self.MODIFIABLES): 3534 for join in self._parse_joins(): 3535 this.append("joins", join) 3536 for lateral in iter(self._parse_lateral, None): 3537 this.append("laterals", lateral) 3538 3539 while True: 3540 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3541 modifier_token = self._curr 3542 parser = self.QUERY_MODIFIER_PARSERS[modifier_token.token_type] 3543 key, expression = parser(self) 3544 3545 if expression: 3546 if this.args.get(key): 3547 self.raise_error( 3548 f"Found multiple '{modifier_token.text.upper()}' clauses", 3549 token=modifier_token, 3550 ) 3551 3552 this.set(key, expression) 3553 if key == "limit": 3554 offset = expression.args.pop("offset", None) 3555 3556 if offset: 3557 offset = exp.Offset(expression=offset) 3558 this.set("offset", offset) 3559 3560 limit_by_expressions = expression.expressions 3561 expression.set("expressions", None) 3562 offset.set("expressions", limit_by_expressions) 3563 continue 3564 break 3565 3566 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3567 this = self._implicit_unnests_to_explicit(this) 3568 3569 return this 3570 3571 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3572 start = self._curr 3573 while self._curr: 3574 self._advance() 3575 3576 end = self._tokens[self._index - 1] 3577 return exp.Hint(expressions=[self._find_sql(start, end)]) 3578 3579 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3580 return self._parse_function_call() 3581 3582 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3583 start_index = self._index 3584 should_fallback_to_string = False 3585 3586 hints = [] 3587 try: 3588 for hint in iter( 3589 lambda: self._parse_csv( 3590 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3591 ), 3592 [], 3593 ): 3594 hints.extend(hint) 3595 except ParseError: 3596 should_fallback_to_string = True 3597 3598 if should_fallback_to_string or self._curr: 3599 self._retreat(start_index) 3600 return self._parse_hint_fallback_to_string() 3601 3602 return self.expression(exp.Hint, expressions=hints) 3603 3604 def _parse_hint(self) -> t.Optional[exp.Hint]: 3605 if self._match(TokenType.HINT) and self._prev_comments: 3606 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3607 3608 return None 3609 3610 def _parse_into(self) -> t.Optional[exp.Into]: 3611 if not self._match(TokenType.INTO): 3612 return None 3613 3614 temp = self._match(TokenType.TEMPORARY) 3615 unlogged = self._match_text_seq("UNLOGGED") 3616 self._match(TokenType.TABLE) 3617 3618 return self.expression( 3619 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3620 ) 3621 3622 def _parse_from( 3623 self, 3624 joins: bool = False, 3625 skip_from_token: bool = False, 3626 consume_pipe: bool = False, 3627 ) -> t.Optional[exp.From]: 3628 if not skip_from_token and not self._match(TokenType.FROM): 3629 return None 3630 3631 return self.expression( 3632 exp.From, 3633 comments=self._prev_comments, 3634 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3635 ) 3636 3637 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3638 return self.expression( 3639 exp.MatchRecognizeMeasure, 3640 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3641 this=self._parse_expression(), 3642 ) 3643 3644 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3645 if not self._match(TokenType.MATCH_RECOGNIZE): 3646 return None 3647 3648 self._match_l_paren() 3649 3650 partition = self._parse_partition_by() 3651 order = self._parse_order() 3652 3653 measures = ( 3654 self._parse_csv(self._parse_match_recognize_measure) 3655 if self._match_text_seq("MEASURES") 3656 else None 3657 ) 3658 3659 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3660 rows = exp.var("ONE ROW PER MATCH") 3661 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3662 text = "ALL ROWS PER MATCH" 3663 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3664 text += " SHOW EMPTY MATCHES" 3665 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3666 text += " OMIT EMPTY MATCHES" 3667 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3668 text += " WITH UNMATCHED ROWS" 3669 rows = exp.var(text) 3670 else: 3671 rows = None 3672 3673 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3674 text = "AFTER MATCH SKIP" 3675 if self._match_text_seq("PAST", "LAST", "ROW"): 3676 text += " PAST LAST ROW" 3677 elif self._match_text_seq("TO", "NEXT", "ROW"): 3678 text += " TO NEXT ROW" 3679 elif self._match_text_seq("TO", "FIRST"): 3680 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3681 elif self._match_text_seq("TO", "LAST"): 3682 text += f" TO LAST {self._advance_any().text}" # type: ignore 3683 after = exp.var(text) 3684 else: 3685 after = None 3686 3687 if self._match_text_seq("PATTERN"): 3688 self._match_l_paren() 3689 3690 if not self._curr: 3691 self.raise_error("Expecting )", self._curr) 3692 3693 paren = 1 3694 start = self._curr 3695 3696 while self._curr and paren > 0: 3697 if self._curr.token_type == TokenType.L_PAREN: 3698 paren += 1 3699 if self._curr.token_type == TokenType.R_PAREN: 3700 paren -= 1 3701 3702 end = self._prev 3703 self._advance() 3704 3705 if paren > 0: 3706 self.raise_error("Expecting )", self._curr) 3707 3708 pattern = exp.var(self._find_sql(start, end)) 3709 else: 3710 pattern = None 3711 3712 define = ( 3713 self._parse_csv(self._parse_name_as_expression) 3714 if self._match_text_seq("DEFINE") 3715 else None 3716 ) 3717 3718 self._match_r_paren() 3719 3720 return self.expression( 3721 exp.MatchRecognize, 3722 partition_by=partition, 3723 order=order, 3724 measures=measures, 3725 rows=rows, 3726 after=after, 3727 pattern=pattern, 3728 define=define, 3729 alias=self._parse_table_alias(), 3730 ) 3731 3732 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3733 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3734 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3735 cross_apply = False 3736 3737 if cross_apply is not None: 3738 this = self._parse_select(table=True) 3739 view = None 3740 outer = None 3741 elif self._match(TokenType.LATERAL): 3742 this = self._parse_select(table=True) 3743 view = self._match(TokenType.VIEW) 3744 outer = self._match(TokenType.OUTER) 3745 else: 3746 return None 3747 3748 if not this: 3749 this = ( 3750 self._parse_unnest() 3751 or self._parse_function() 3752 or self._parse_id_var(any_token=False) 3753 ) 3754 3755 while self._match(TokenType.DOT): 3756 this = exp.Dot( 3757 this=this, 3758 expression=self._parse_function() or self._parse_id_var(any_token=False), 3759 ) 3760 3761 ordinality: t.Optional[bool] = None 3762 3763 if view: 3764 table = self._parse_id_var(any_token=False) 3765 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3766 table_alias: t.Optional[exp.TableAlias] = self.expression( 3767 exp.TableAlias, this=table, columns=columns 3768 ) 3769 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3770 # We move the alias from the lateral's child node to the lateral itself 3771 table_alias = this.args["alias"].pop() 3772 else: 3773 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3774 table_alias = self._parse_table_alias() 3775 3776 return self.expression( 3777 exp.Lateral, 3778 this=this, 3779 view=view, 3780 outer=outer, 3781 alias=table_alias, 3782 cross_apply=cross_apply, 3783 ordinality=ordinality, 3784 ) 3785 3786 def _parse_join_parts( 3787 self, 3788 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3789 return ( 3790 self._match_set(self.JOIN_METHODS) and self._prev, 3791 self._match_set(self.JOIN_SIDES) and self._prev, 3792 self._match_set(self.JOIN_KINDS) and self._prev, 3793 ) 3794 3795 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3796 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3797 this = self._parse_column() 3798 if isinstance(this, exp.Column): 3799 return this.this 3800 return this 3801 3802 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3803 3804 def _parse_join( 3805 self, skip_join_token: bool = False, parse_bracket: bool = False 3806 ) -> t.Optional[exp.Join]: 3807 if self._match(TokenType.COMMA): 3808 table = self._try_parse(self._parse_table) 3809 cross_join = self.expression(exp.Join, this=table) if table else None 3810 3811 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3812 cross_join.set("kind", "CROSS") 3813 3814 return cross_join 3815 3816 index = self._index 3817 method, side, kind = self._parse_join_parts() 3818 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3819 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3820 join_comments = self._prev_comments 3821 3822 if not skip_join_token and not join: 3823 self._retreat(index) 3824 kind = None 3825 method = None 3826 side = None 3827 3828 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3829 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3830 3831 if not skip_join_token and not join and not outer_apply and not cross_apply: 3832 return None 3833 3834 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3835 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3836 kwargs["expressions"] = self._parse_csv( 3837 lambda: self._parse_table(parse_bracket=parse_bracket) 3838 ) 3839 3840 if method: 3841 kwargs["method"] = method.text 3842 if side: 3843 kwargs["side"] = side.text 3844 if kind: 3845 kwargs["kind"] = kind.text 3846 if hint: 3847 kwargs["hint"] = hint 3848 3849 if self._match(TokenType.MATCH_CONDITION): 3850 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3851 3852 if self._match(TokenType.ON): 3853 kwargs["on"] = self._parse_assignment() 3854 elif self._match(TokenType.USING): 3855 kwargs["using"] = self._parse_using_identifiers() 3856 elif ( 3857 not method 3858 and not (outer_apply or cross_apply) 3859 and not isinstance(kwargs["this"], exp.Unnest) 3860 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3861 ): 3862 index = self._index 3863 joins: t.Optional[list] = list(self._parse_joins()) 3864 3865 if joins and self._match(TokenType.ON): 3866 kwargs["on"] = self._parse_assignment() 3867 elif joins and self._match(TokenType.USING): 3868 kwargs["using"] = self._parse_using_identifiers() 3869 else: 3870 joins = None 3871 self._retreat(index) 3872 3873 kwargs["this"].set("joins", joins if joins else None) 3874 3875 kwargs["pivots"] = self._parse_pivots() 3876 3877 comments = [c for token in (method, side, kind) if token for c in token.comments] 3878 comments = (join_comments or []) + comments 3879 return self.expression(exp.Join, comments=comments, **kwargs) 3880 3881 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3882 this = self._parse_assignment() 3883 3884 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3885 return this 3886 3887 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3888 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3889 3890 return this 3891 3892 def _parse_index_params(self) -> exp.IndexParameters: 3893 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3894 3895 if self._match(TokenType.L_PAREN, advance=False): 3896 columns = self._parse_wrapped_csv(self._parse_with_operator) 3897 else: 3898 columns = None 3899 3900 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3901 partition_by = self._parse_partition_by() 3902 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3903 tablespace = ( 3904 self._parse_var(any_token=True) 3905 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3906 else None 3907 ) 3908 where = self._parse_where() 3909 3910 on = self._parse_field() if self._match(TokenType.ON) else None 3911 3912 return self.expression( 3913 exp.IndexParameters, 3914 using=using, 3915 columns=columns, 3916 include=include, 3917 partition_by=partition_by, 3918 where=where, 3919 with_storage=with_storage, 3920 tablespace=tablespace, 3921 on=on, 3922 ) 3923 3924 def _parse_index( 3925 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3926 ) -> t.Optional[exp.Index]: 3927 if index or anonymous: 3928 unique = None 3929 primary = None 3930 amp = None 3931 3932 self._match(TokenType.ON) 3933 self._match(TokenType.TABLE) # hive 3934 table = self._parse_table_parts(schema=True) 3935 else: 3936 unique = self._match(TokenType.UNIQUE) 3937 primary = self._match_text_seq("PRIMARY") 3938 amp = self._match_text_seq("AMP") 3939 3940 if not self._match(TokenType.INDEX): 3941 return None 3942 3943 index = self._parse_id_var() 3944 table = None 3945 3946 params = self._parse_index_params() 3947 3948 return self.expression( 3949 exp.Index, 3950 this=index, 3951 table=table, 3952 unique=unique, 3953 primary=primary, 3954 amp=amp, 3955 params=params, 3956 ) 3957 3958 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3959 hints: t.List[exp.Expression] = [] 3960 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3961 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3962 hints.append( 3963 self.expression( 3964 exp.WithTableHint, 3965 expressions=self._parse_csv( 3966 lambda: self._parse_function() or self._parse_var(any_token=True) 3967 ), 3968 ) 3969 ) 3970 self._match_r_paren() 3971 else: 3972 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3973 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3974 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3975 3976 self._match_set((TokenType.INDEX, TokenType.KEY)) 3977 if self._match(TokenType.FOR): 3978 hint.set("target", self._advance_any() and self._prev.text.upper()) 3979 3980 hint.set("expressions", self._parse_wrapped_id_vars()) 3981 hints.append(hint) 3982 3983 return hints or None 3984 3985 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3986 return ( 3987 (not schema and self._parse_function(optional_parens=False)) 3988 or self._parse_id_var(any_token=False) 3989 or self._parse_string_as_identifier() 3990 or self._parse_placeholder() 3991 ) 3992 3993 def _parse_table_parts( 3994 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3995 ) -> exp.Table: 3996 catalog = None 3997 db = None 3998 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3999 4000 while self._match(TokenType.DOT): 4001 if catalog: 4002 # This allows nesting the table in arbitrarily many dot expressions if needed 4003 table = self.expression( 4004 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 4005 ) 4006 else: 4007 catalog = db 4008 db = table 4009 # "" used for tsql FROM a..b case 4010 table = self._parse_table_part(schema=schema) or "" 4011 4012 if ( 4013 wildcard 4014 and self._is_connected() 4015 and (isinstance(table, exp.Identifier) or not table) 4016 and self._match(TokenType.STAR) 4017 ): 4018 if isinstance(table, exp.Identifier): 4019 table.args["this"] += "*" 4020 else: 4021 table = exp.Identifier(this="*") 4022 4023 # We bubble up comments from the Identifier to the Table 4024 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 4025 4026 if is_db_reference: 4027 catalog = db 4028 db = table 4029 table = None 4030 4031 if not table and not is_db_reference: 4032 self.raise_error(f"Expected table name but got {self._curr}") 4033 if not db and is_db_reference: 4034 self.raise_error(f"Expected database name but got {self._curr}") 4035 4036 table = self.expression( 4037 exp.Table, 4038 comments=comments, 4039 this=table, 4040 db=db, 4041 catalog=catalog, 4042 ) 4043 4044 changes = self._parse_changes() 4045 if changes: 4046 table.set("changes", changes) 4047 4048 at_before = self._parse_historical_data() 4049 if at_before: 4050 table.set("when", at_before) 4051 4052 pivots = self._parse_pivots() 4053 if pivots: 4054 table.set("pivots", pivots) 4055 4056 return table 4057 4058 def _parse_table( 4059 self, 4060 schema: bool = False, 4061 joins: bool = False, 4062 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4063 parse_bracket: bool = False, 4064 is_db_reference: bool = False, 4065 parse_partition: bool = False, 4066 consume_pipe: bool = False, 4067 ) -> t.Optional[exp.Expression]: 4068 lateral = self._parse_lateral() 4069 if lateral: 4070 return lateral 4071 4072 unnest = self._parse_unnest() 4073 if unnest: 4074 return unnest 4075 4076 values = self._parse_derived_table_values() 4077 if values: 4078 return values 4079 4080 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4081 if subquery: 4082 if not subquery.args.get("pivots"): 4083 subquery.set("pivots", self._parse_pivots()) 4084 return subquery 4085 4086 bracket = parse_bracket and self._parse_bracket(None) 4087 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4088 4089 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4090 self._parse_table 4091 ) 4092 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4093 4094 only = self._match(TokenType.ONLY) 4095 4096 this = t.cast( 4097 exp.Expression, 4098 bracket 4099 or rows_from 4100 or self._parse_bracket( 4101 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4102 ), 4103 ) 4104 4105 if only: 4106 this.set("only", only) 4107 4108 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4109 self._match_text_seq("*") 4110 4111 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4112 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4113 this.set("partition", self._parse_partition()) 4114 4115 if schema: 4116 return self._parse_schema(this=this) 4117 4118 version = self._parse_version() 4119 4120 if version: 4121 this.set("version", version) 4122 4123 if self.dialect.ALIAS_POST_TABLESAMPLE: 4124 this.set("sample", self._parse_table_sample()) 4125 4126 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4127 if alias: 4128 this.set("alias", alias) 4129 4130 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4131 return self.expression( 4132 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4133 ) 4134 4135 this.set("hints", self._parse_table_hints()) 4136 4137 if not this.args.get("pivots"): 4138 this.set("pivots", self._parse_pivots()) 4139 4140 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4141 this.set("sample", self._parse_table_sample()) 4142 4143 if joins: 4144 for join in self._parse_joins(): 4145 this.append("joins", join) 4146 4147 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4148 this.set("ordinality", True) 4149 this.set("alias", self._parse_table_alias()) 4150 4151 return this 4152 4153 def _parse_version(self) -> t.Optional[exp.Version]: 4154 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4155 this = "TIMESTAMP" 4156 elif self._match(TokenType.VERSION_SNAPSHOT): 4157 this = "VERSION" 4158 else: 4159 return None 4160 4161 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4162 kind = self._prev.text.upper() 4163 start = self._parse_bitwise() 4164 self._match_texts(("TO", "AND")) 4165 end = self._parse_bitwise() 4166 expression: t.Optional[exp.Expression] = self.expression( 4167 exp.Tuple, expressions=[start, end] 4168 ) 4169 elif self._match_text_seq("CONTAINED", "IN"): 4170 kind = "CONTAINED IN" 4171 expression = self.expression( 4172 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4173 ) 4174 elif self._match(TokenType.ALL): 4175 kind = "ALL" 4176 expression = None 4177 else: 4178 self._match_text_seq("AS", "OF") 4179 kind = "AS OF" 4180 expression = self._parse_type() 4181 4182 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4183 4184 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4185 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4186 index = self._index 4187 historical_data = None 4188 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4189 this = self._prev.text.upper() 4190 kind = ( 4191 self._match(TokenType.L_PAREN) 4192 and self._match_texts(self.HISTORICAL_DATA_KIND) 4193 and self._prev.text.upper() 4194 ) 4195 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4196 4197 if expression: 4198 self._match_r_paren() 4199 historical_data = self.expression( 4200 exp.HistoricalData, this=this, kind=kind, expression=expression 4201 ) 4202 else: 4203 self._retreat(index) 4204 4205 return historical_data 4206 4207 def _parse_changes(self) -> t.Optional[exp.Changes]: 4208 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4209 return None 4210 4211 information = self._parse_var(any_token=True) 4212 self._match_r_paren() 4213 4214 return self.expression( 4215 exp.Changes, 4216 information=information, 4217 at_before=self._parse_historical_data(), 4218 end=self._parse_historical_data(), 4219 ) 4220 4221 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4222 if not self._match(TokenType.UNNEST): 4223 return None 4224 4225 expressions = self._parse_wrapped_csv(self._parse_equality) 4226 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4227 4228 alias = self._parse_table_alias() if with_alias else None 4229 4230 if alias: 4231 if self.dialect.UNNEST_COLUMN_ONLY: 4232 if alias.args.get("columns"): 4233 self.raise_error("Unexpected extra column alias in unnest.") 4234 4235 alias.set("columns", [alias.this]) 4236 alias.set("this", None) 4237 4238 columns = alias.args.get("columns") or [] 4239 if offset and len(expressions) < len(columns): 4240 offset = columns.pop() 4241 4242 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4243 self._match(TokenType.ALIAS) 4244 offset = self._parse_id_var( 4245 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4246 ) or exp.to_identifier("offset") 4247 4248 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4249 4250 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4251 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4252 if not is_derived and not ( 4253 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4254 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4255 ): 4256 return None 4257 4258 expressions = self._parse_csv(self._parse_value) 4259 alias = self._parse_table_alias() 4260 4261 if is_derived: 4262 self._match_r_paren() 4263 4264 return self.expression( 4265 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4266 ) 4267 4268 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4269 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4270 as_modifier and self._match_text_seq("USING", "SAMPLE") 4271 ): 4272 return None 4273 4274 bucket_numerator = None 4275 bucket_denominator = None 4276 bucket_field = None 4277 percent = None 4278 size = None 4279 seed = None 4280 4281 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4282 matched_l_paren = self._match(TokenType.L_PAREN) 4283 4284 if self.TABLESAMPLE_CSV: 4285 num = None 4286 expressions = self._parse_csv(self._parse_primary) 4287 else: 4288 expressions = None 4289 num = ( 4290 self._parse_factor() 4291 if self._match(TokenType.NUMBER, advance=False) 4292 else self._parse_primary() or self._parse_placeholder() 4293 ) 4294 4295 if self._match_text_seq("BUCKET"): 4296 bucket_numerator = self._parse_number() 4297 self._match_text_seq("OUT", "OF") 4298 bucket_denominator = bucket_denominator = self._parse_number() 4299 self._match(TokenType.ON) 4300 bucket_field = self._parse_field() 4301 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4302 percent = num 4303 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4304 size = num 4305 else: 4306 percent = num 4307 4308 if matched_l_paren: 4309 self._match_r_paren() 4310 4311 if self._match(TokenType.L_PAREN): 4312 method = self._parse_var(upper=True) 4313 seed = self._match(TokenType.COMMA) and self._parse_number() 4314 self._match_r_paren() 4315 elif self._match_texts(("SEED", "REPEATABLE")): 4316 seed = self._parse_wrapped(self._parse_number) 4317 4318 if not method and self.DEFAULT_SAMPLING_METHOD: 4319 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4320 4321 return self.expression( 4322 exp.TableSample, 4323 expressions=expressions, 4324 method=method, 4325 bucket_numerator=bucket_numerator, 4326 bucket_denominator=bucket_denominator, 4327 bucket_field=bucket_field, 4328 percent=percent, 4329 size=size, 4330 seed=seed, 4331 ) 4332 4333 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4334 return list(iter(self._parse_pivot, None)) or None 4335 4336 def _parse_joins(self) -> t.Iterator[exp.Join]: 4337 return iter(self._parse_join, None) 4338 4339 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4340 if not self._match(TokenType.INTO): 4341 return None 4342 4343 return self.expression( 4344 exp.UnpivotColumns, 4345 this=self._match_text_seq("NAME") and self._parse_column(), 4346 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4347 ) 4348 4349 # https://duckdb.org/docs/sql/statements/pivot 4350 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4351 def _parse_on() -> t.Optional[exp.Expression]: 4352 this = self._parse_bitwise() 4353 4354 if self._match(TokenType.IN): 4355 # PIVOT ... ON col IN (row_val1, row_val2) 4356 return self._parse_in(this) 4357 if self._match(TokenType.ALIAS, advance=False): 4358 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4359 return self._parse_alias(this) 4360 4361 return this 4362 4363 this = self._parse_table() 4364 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4365 into = self._parse_unpivot_columns() 4366 using = self._match(TokenType.USING) and self._parse_csv( 4367 lambda: self._parse_alias(self._parse_function()) 4368 ) 4369 group = self._parse_group() 4370 4371 return self.expression( 4372 exp.Pivot, 4373 this=this, 4374 expressions=expressions, 4375 using=using, 4376 group=group, 4377 unpivot=is_unpivot, 4378 into=into, 4379 ) 4380 4381 def _parse_pivot_in(self) -> exp.In: 4382 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4383 this = self._parse_select_or_expression() 4384 4385 self._match(TokenType.ALIAS) 4386 alias = self._parse_bitwise() 4387 if alias: 4388 if isinstance(alias, exp.Column) and not alias.db: 4389 alias = alias.this 4390 return self.expression(exp.PivotAlias, this=this, alias=alias) 4391 4392 return this 4393 4394 value = self._parse_column() 4395 4396 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4397 self.raise_error("Expecting IN (") 4398 4399 if self._match(TokenType.ANY): 4400 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4401 else: 4402 exprs = self._parse_csv(_parse_aliased_expression) 4403 4404 self._match_r_paren() 4405 return self.expression(exp.In, this=value, expressions=exprs) 4406 4407 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4408 func = self._parse_function() 4409 if not func: 4410 self.raise_error("Expecting an aggregation function in PIVOT") 4411 4412 return self._parse_alias(func) 4413 4414 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4415 index = self._index 4416 include_nulls = None 4417 4418 if self._match(TokenType.PIVOT): 4419 unpivot = False 4420 elif self._match(TokenType.UNPIVOT): 4421 unpivot = True 4422 4423 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4424 if self._match_text_seq("INCLUDE", "NULLS"): 4425 include_nulls = True 4426 elif self._match_text_seq("EXCLUDE", "NULLS"): 4427 include_nulls = False 4428 else: 4429 return None 4430 4431 expressions = [] 4432 4433 if not self._match(TokenType.L_PAREN): 4434 self._retreat(index) 4435 return None 4436 4437 if unpivot: 4438 expressions = self._parse_csv(self._parse_column) 4439 else: 4440 expressions = self._parse_csv(self._parse_pivot_aggregation) 4441 4442 if not expressions: 4443 self.raise_error("Failed to parse PIVOT's aggregation list") 4444 4445 if not self._match(TokenType.FOR): 4446 self.raise_error("Expecting FOR") 4447 4448 fields = [] 4449 while True: 4450 field = self._try_parse(self._parse_pivot_in) 4451 if not field: 4452 break 4453 fields.append(field) 4454 4455 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4456 self._parse_bitwise 4457 ) 4458 4459 group = self._parse_group() 4460 4461 self._match_r_paren() 4462 4463 pivot = self.expression( 4464 exp.Pivot, 4465 expressions=expressions, 4466 fields=fields, 4467 unpivot=unpivot, 4468 include_nulls=include_nulls, 4469 default_on_null=default_on_null, 4470 group=group, 4471 ) 4472 4473 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4474 pivot.set("alias", self._parse_table_alias()) 4475 4476 if not unpivot: 4477 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4478 4479 columns: t.List[exp.Expression] = [] 4480 all_fields = [] 4481 for pivot_field in pivot.fields: 4482 pivot_field_expressions = pivot_field.expressions 4483 4484 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4485 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4486 continue 4487 4488 all_fields.append( 4489 [ 4490 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4491 for fld in pivot_field_expressions 4492 ] 4493 ) 4494 4495 if all_fields: 4496 if names: 4497 all_fields.append(names) 4498 4499 # Generate all possible combinations of the pivot columns 4500 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4501 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4502 for fld_parts_tuple in itertools.product(*all_fields): 4503 fld_parts = list(fld_parts_tuple) 4504 4505 if names and self.PREFIXED_PIVOT_COLUMNS: 4506 # Move the "name" to the front of the list 4507 fld_parts.insert(0, fld_parts.pop(-1)) 4508 4509 columns.append(exp.to_identifier("_".join(fld_parts))) 4510 4511 pivot.set("columns", columns) 4512 4513 return pivot 4514 4515 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4516 return [agg.alias for agg in aggregations if agg.alias] 4517 4518 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4519 if not skip_where_token and not self._match(TokenType.PREWHERE): 4520 return None 4521 4522 return self.expression( 4523 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4524 ) 4525 4526 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4527 if not skip_where_token and not self._match(TokenType.WHERE): 4528 return None 4529 4530 return self.expression( 4531 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4532 ) 4533 4534 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4535 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4536 return None 4537 comments = self._prev_comments 4538 4539 elements: t.Dict[str, t.Any] = defaultdict(list) 4540 4541 if self._match(TokenType.ALL): 4542 elements["all"] = True 4543 elif self._match(TokenType.DISTINCT): 4544 elements["all"] = False 4545 4546 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4547 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4548 4549 while True: 4550 index = self._index 4551 4552 elements["expressions"].extend( 4553 self._parse_csv( 4554 lambda: None 4555 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4556 else self._parse_assignment() 4557 ) 4558 ) 4559 4560 before_with_index = self._index 4561 with_prefix = self._match(TokenType.WITH) 4562 4563 if self._match(TokenType.ROLLUP): 4564 elements["rollup"].append( 4565 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4566 ) 4567 elif self._match(TokenType.CUBE): 4568 elements["cube"].append( 4569 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4570 ) 4571 elif self._match(TokenType.GROUPING_SETS): 4572 elements["grouping_sets"].append( 4573 self.expression( 4574 exp.GroupingSets, 4575 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4576 ) 4577 ) 4578 elif self._match_text_seq("TOTALS"): 4579 elements["totals"] = True # type: ignore 4580 4581 if before_with_index <= self._index <= before_with_index + 1: 4582 self._retreat(before_with_index) 4583 break 4584 4585 if index == self._index: 4586 break 4587 4588 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4589 4590 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4591 return self.expression( 4592 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4593 ) 4594 4595 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4596 if self._match(TokenType.L_PAREN): 4597 grouping_set = self._parse_csv(self._parse_column) 4598 self._match_r_paren() 4599 return self.expression(exp.Tuple, expressions=grouping_set) 4600 4601 return self._parse_column() 4602 4603 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4604 if not skip_having_token and not self._match(TokenType.HAVING): 4605 return None 4606 return self.expression( 4607 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4608 ) 4609 4610 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4611 if not self._match(TokenType.QUALIFY): 4612 return None 4613 return self.expression(exp.Qualify, this=self._parse_assignment()) 4614 4615 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4616 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4617 exp.Prior, this=self._parse_bitwise() 4618 ) 4619 connect = self._parse_assignment() 4620 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4621 return connect 4622 4623 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4624 if skip_start_token: 4625 start = None 4626 elif self._match(TokenType.START_WITH): 4627 start = self._parse_assignment() 4628 else: 4629 return None 4630 4631 self._match(TokenType.CONNECT_BY) 4632 nocycle = self._match_text_seq("NOCYCLE") 4633 connect = self._parse_connect_with_prior() 4634 4635 if not start and self._match(TokenType.START_WITH): 4636 start = self._parse_assignment() 4637 4638 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4639 4640 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4641 this = self._parse_id_var(any_token=True) 4642 if self._match(TokenType.ALIAS): 4643 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4644 return this 4645 4646 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4647 if self._match_text_seq("INTERPOLATE"): 4648 return self._parse_wrapped_csv(self._parse_name_as_expression) 4649 return None 4650 4651 def _parse_order( 4652 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4653 ) -> t.Optional[exp.Expression]: 4654 siblings = None 4655 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4656 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4657 return this 4658 4659 siblings = True 4660 4661 return self.expression( 4662 exp.Order, 4663 comments=self._prev_comments, 4664 this=this, 4665 expressions=self._parse_csv(self._parse_ordered), 4666 siblings=siblings, 4667 ) 4668 4669 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4670 if not self._match(token): 4671 return None 4672 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4673 4674 def _parse_ordered( 4675 self, parse_method: t.Optional[t.Callable] = None 4676 ) -> t.Optional[exp.Ordered]: 4677 this = parse_method() if parse_method else self._parse_assignment() 4678 if not this: 4679 return None 4680 4681 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4682 this = exp.var("ALL") 4683 4684 asc = self._match(TokenType.ASC) 4685 desc = self._match(TokenType.DESC) or (asc and False) 4686 4687 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4688 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4689 4690 nulls_first = is_nulls_first or False 4691 explicitly_null_ordered = is_nulls_first or is_nulls_last 4692 4693 if ( 4694 not explicitly_null_ordered 4695 and ( 4696 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4697 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4698 ) 4699 and self.dialect.NULL_ORDERING != "nulls_are_last" 4700 ): 4701 nulls_first = True 4702 4703 if self._match_text_seq("WITH", "FILL"): 4704 with_fill = self.expression( 4705 exp.WithFill, 4706 **{ # type: ignore 4707 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4708 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4709 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4710 "interpolate": self._parse_interpolate(), 4711 }, 4712 ) 4713 else: 4714 with_fill = None 4715 4716 return self.expression( 4717 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4718 ) 4719 4720 def _parse_limit_options(self) -> exp.LimitOptions: 4721 percent = self._match(TokenType.PERCENT) 4722 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4723 self._match_text_seq("ONLY") 4724 with_ties = self._match_text_seq("WITH", "TIES") 4725 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4726 4727 def _parse_limit( 4728 self, 4729 this: t.Optional[exp.Expression] = None, 4730 top: bool = False, 4731 skip_limit_token: bool = False, 4732 ) -> t.Optional[exp.Expression]: 4733 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4734 comments = self._prev_comments 4735 if top: 4736 limit_paren = self._match(TokenType.L_PAREN) 4737 expression = self._parse_term() if limit_paren else self._parse_number() 4738 4739 if limit_paren: 4740 self._match_r_paren() 4741 4742 limit_options = self._parse_limit_options() 4743 else: 4744 limit_options = None 4745 expression = self._parse_term() 4746 4747 if self._match(TokenType.COMMA): 4748 offset = expression 4749 expression = self._parse_term() 4750 else: 4751 offset = None 4752 4753 limit_exp = self.expression( 4754 exp.Limit, 4755 this=this, 4756 expression=expression, 4757 offset=offset, 4758 comments=comments, 4759 limit_options=limit_options, 4760 expressions=self._parse_limit_by(), 4761 ) 4762 4763 return limit_exp 4764 4765 if self._match(TokenType.FETCH): 4766 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4767 direction = self._prev.text.upper() if direction else "FIRST" 4768 4769 count = self._parse_field(tokens=self.FETCH_TOKENS) 4770 4771 return self.expression( 4772 exp.Fetch, 4773 direction=direction, 4774 count=count, 4775 limit_options=self._parse_limit_options(), 4776 ) 4777 4778 return this 4779 4780 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4781 if not self._match(TokenType.OFFSET): 4782 return this 4783 4784 count = self._parse_term() 4785 self._match_set((TokenType.ROW, TokenType.ROWS)) 4786 4787 return self.expression( 4788 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4789 ) 4790 4791 def _can_parse_limit_or_offset(self) -> bool: 4792 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4793 return False 4794 4795 index = self._index 4796 result = bool( 4797 self._try_parse(self._parse_limit, retreat=True) 4798 or self._try_parse(self._parse_offset, retreat=True) 4799 ) 4800 self._retreat(index) 4801 return result 4802 4803 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4804 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4805 4806 def _parse_locks(self) -> t.List[exp.Lock]: 4807 locks = [] 4808 while True: 4809 update, key = None, None 4810 if self._match_text_seq("FOR", "UPDATE"): 4811 update = True 4812 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4813 "LOCK", "IN", "SHARE", "MODE" 4814 ): 4815 update = False 4816 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4817 update, key = False, True 4818 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4819 update, key = True, True 4820 else: 4821 break 4822 4823 expressions = None 4824 if self._match_text_seq("OF"): 4825 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4826 4827 wait: t.Optional[bool | exp.Expression] = None 4828 if self._match_text_seq("NOWAIT"): 4829 wait = True 4830 elif self._match_text_seq("WAIT"): 4831 wait = self._parse_primary() 4832 elif self._match_text_seq("SKIP", "LOCKED"): 4833 wait = False 4834 4835 locks.append( 4836 self.expression( 4837 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4838 ) 4839 ) 4840 4841 return locks 4842 4843 def parse_set_operation( 4844 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4845 ) -> t.Optional[exp.Expression]: 4846 start = self._index 4847 _, side_token, kind_token = self._parse_join_parts() 4848 4849 side = side_token.text if side_token else None 4850 kind = kind_token.text if kind_token else None 4851 4852 if not self._match_set(self.SET_OPERATIONS): 4853 self._retreat(start) 4854 return None 4855 4856 token_type = self._prev.token_type 4857 4858 if token_type == TokenType.UNION: 4859 operation: t.Type[exp.SetOperation] = exp.Union 4860 elif token_type == TokenType.EXCEPT: 4861 operation = exp.Except 4862 else: 4863 operation = exp.Intersect 4864 4865 comments = self._prev.comments 4866 4867 if self._match(TokenType.DISTINCT): 4868 distinct: t.Optional[bool] = True 4869 elif self._match(TokenType.ALL): 4870 distinct = False 4871 else: 4872 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4873 if distinct is None: 4874 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4875 4876 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4877 "STRICT", "CORRESPONDING" 4878 ) 4879 if self._match_text_seq("CORRESPONDING"): 4880 by_name = True 4881 if not side and not kind: 4882 kind = "INNER" 4883 4884 on_column_list = None 4885 if by_name and self._match_texts(("ON", "BY")): 4886 on_column_list = self._parse_wrapped_csv(self._parse_column) 4887 4888 expression = self._parse_select( 4889 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4890 ) 4891 4892 return self.expression( 4893 operation, 4894 comments=comments, 4895 this=this, 4896 distinct=distinct, 4897 by_name=by_name, 4898 expression=expression, 4899 side=side, 4900 kind=kind, 4901 on=on_column_list, 4902 ) 4903 4904 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4905 while this: 4906 setop = self.parse_set_operation(this) 4907 if not setop: 4908 break 4909 this = setop 4910 4911 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4912 expression = this.expression 4913 4914 if expression: 4915 for arg in self.SET_OP_MODIFIERS: 4916 expr = expression.args.get(arg) 4917 if expr: 4918 this.set(arg, expr.pop()) 4919 4920 return this 4921 4922 def _parse_expression(self) -> t.Optional[exp.Expression]: 4923 return self._parse_alias(self._parse_assignment()) 4924 4925 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4926 this = self._parse_disjunction() 4927 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4928 # This allows us to parse <non-identifier token> := <expr> 4929 this = exp.column( 4930 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4931 ) 4932 4933 while self._match_set(self.ASSIGNMENT): 4934 if isinstance(this, exp.Column) and len(this.parts) == 1: 4935 this = this.this 4936 4937 this = self.expression( 4938 self.ASSIGNMENT[self._prev.token_type], 4939 this=this, 4940 comments=self._prev_comments, 4941 expression=self._parse_assignment(), 4942 ) 4943 4944 return this 4945 4946 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4947 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4948 4949 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4950 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4951 4952 def _parse_equality(self) -> t.Optional[exp.Expression]: 4953 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4954 4955 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4956 return self._parse_tokens(self._parse_range, self.COMPARISON) 4957 4958 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4959 this = this or self._parse_bitwise() 4960 negate = self._match(TokenType.NOT) 4961 4962 if self._match_set(self.RANGE_PARSERS): 4963 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4964 if not expression: 4965 return this 4966 4967 this = expression 4968 elif self._match(TokenType.ISNULL): 4969 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4970 4971 # Postgres supports ISNULL and NOTNULL for conditions. 4972 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4973 if self._match(TokenType.NOTNULL): 4974 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4975 this = self.expression(exp.Not, this=this) 4976 4977 if negate: 4978 this = self._negate_range(this) 4979 4980 if self._match(TokenType.IS): 4981 this = self._parse_is(this) 4982 4983 return this 4984 4985 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4986 if not this: 4987 return this 4988 4989 return self.expression(exp.Not, this=this) 4990 4991 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4992 index = self._index - 1 4993 negate = self._match(TokenType.NOT) 4994 4995 if self._match_text_seq("DISTINCT", "FROM"): 4996 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4997 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4998 4999 if self._match(TokenType.JSON): 5000 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 5001 5002 if self._match_text_seq("WITH"): 5003 _with = True 5004 elif self._match_text_seq("WITHOUT"): 5005 _with = False 5006 else: 5007 _with = None 5008 5009 unique = self._match(TokenType.UNIQUE) 5010 self._match_text_seq("KEYS") 5011 expression: t.Optional[exp.Expression] = self.expression( 5012 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 5013 ) 5014 else: 5015 expression = self._parse_primary() or self._parse_null() 5016 if not expression: 5017 self._retreat(index) 5018 return None 5019 5020 this = self.expression(exp.Is, this=this, expression=expression) 5021 return self.expression(exp.Not, this=this) if negate else this 5022 5023 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 5024 unnest = self._parse_unnest(with_alias=False) 5025 if unnest: 5026 this = self.expression(exp.In, this=this, unnest=unnest) 5027 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 5028 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 5029 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 5030 5031 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 5032 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 5033 else: 5034 this = self.expression(exp.In, this=this, expressions=expressions) 5035 5036 if matched_l_paren: 5037 self._match_r_paren(this) 5038 elif not self._match(TokenType.R_BRACKET, expression=this): 5039 self.raise_error("Expecting ]") 5040 else: 5041 this = self.expression(exp.In, this=this, field=self._parse_column()) 5042 5043 return this 5044 5045 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5046 symmetric = None 5047 if self._match_text_seq("SYMMETRIC"): 5048 symmetric = True 5049 elif self._match_text_seq("ASYMMETRIC"): 5050 symmetric = False 5051 5052 low = self._parse_bitwise() 5053 self._match(TokenType.AND) 5054 high = self._parse_bitwise() 5055 5056 return self.expression( 5057 exp.Between, 5058 this=this, 5059 low=low, 5060 high=high, 5061 symmetric=symmetric, 5062 ) 5063 5064 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5065 if not self._match(TokenType.ESCAPE): 5066 return this 5067 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5068 5069 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5070 index = self._index 5071 5072 if not self._match(TokenType.INTERVAL) and match_interval: 5073 return None 5074 5075 if self._match(TokenType.STRING, advance=False): 5076 this = self._parse_primary() 5077 else: 5078 this = self._parse_term() 5079 5080 if not this or ( 5081 isinstance(this, exp.Column) 5082 and not this.table 5083 and not this.this.quoted 5084 and this.name.upper() == "IS" 5085 ): 5086 self._retreat(index) 5087 return None 5088 5089 unit = self._parse_function() or ( 5090 not self._match(TokenType.ALIAS, advance=False) 5091 and self._parse_var(any_token=True, upper=True) 5092 ) 5093 5094 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5095 # each INTERVAL expression into this canonical form so it's easy to transpile 5096 if this and this.is_number: 5097 this = exp.Literal.string(this.to_py()) 5098 elif this and this.is_string: 5099 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5100 if parts and unit: 5101 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5102 unit = None 5103 self._retreat(self._index - 1) 5104 5105 if len(parts) == 1: 5106 this = exp.Literal.string(parts[0][0]) 5107 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5108 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5109 unit = self.expression( 5110 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5111 ) 5112 5113 interval = self.expression(exp.Interval, this=this, unit=unit) 5114 5115 index = self._index 5116 self._match(TokenType.PLUS) 5117 5118 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5119 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5120 return self.expression( 5121 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5122 ) 5123 5124 self._retreat(index) 5125 return interval 5126 5127 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5128 this = self._parse_term() 5129 5130 while True: 5131 if self._match_set(self.BITWISE): 5132 this = self.expression( 5133 self.BITWISE[self._prev.token_type], 5134 this=this, 5135 expression=self._parse_term(), 5136 ) 5137 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5138 this = self.expression( 5139 exp.DPipe, 5140 this=this, 5141 expression=self._parse_term(), 5142 safe=not self.dialect.STRICT_STRING_CONCAT, 5143 ) 5144 elif self._match(TokenType.DQMARK): 5145 this = self.expression( 5146 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5147 ) 5148 elif self._match_pair(TokenType.LT, TokenType.LT): 5149 this = self.expression( 5150 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5151 ) 5152 elif self._match_pair(TokenType.GT, TokenType.GT): 5153 this = self.expression( 5154 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5155 ) 5156 else: 5157 break 5158 5159 return this 5160 5161 def _parse_term(self) -> t.Optional[exp.Expression]: 5162 this = self._parse_factor() 5163 5164 while self._match_set(self.TERM): 5165 klass = self.TERM[self._prev.token_type] 5166 comments = self._prev_comments 5167 expression = self._parse_factor() 5168 5169 this = self.expression(klass, this=this, comments=comments, expression=expression) 5170 5171 if isinstance(this, exp.Collate): 5172 expr = this.expression 5173 5174 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5175 # fallback to Identifier / Var 5176 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5177 ident = expr.this 5178 if isinstance(ident, exp.Identifier): 5179 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5180 5181 return this 5182 5183 def _parse_factor(self) -> t.Optional[exp.Expression]: 5184 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5185 this = parse_method() 5186 5187 while self._match_set(self.FACTOR): 5188 klass = self.FACTOR[self._prev.token_type] 5189 comments = self._prev_comments 5190 expression = parse_method() 5191 5192 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5193 self._retreat(self._index - 1) 5194 return this 5195 5196 this = self.expression(klass, this=this, comments=comments, expression=expression) 5197 5198 if isinstance(this, exp.Div): 5199 this.args["typed"] = self.dialect.TYPED_DIVISION 5200 this.args["safe"] = self.dialect.SAFE_DIVISION 5201 5202 return this 5203 5204 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5205 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5206 5207 def _parse_unary(self) -> t.Optional[exp.Expression]: 5208 if self._match_set(self.UNARY_PARSERS): 5209 return self.UNARY_PARSERS[self._prev.token_type](self) 5210 return self._parse_at_time_zone(self._parse_type()) 5211 5212 def _parse_type( 5213 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5214 ) -> t.Optional[exp.Expression]: 5215 interval = parse_interval and self._parse_interval() 5216 if interval: 5217 return interval 5218 5219 index = self._index 5220 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5221 5222 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5223 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5224 if isinstance(data_type, exp.Cast): 5225 # This constructor can contain ops directly after it, for instance struct unnesting: 5226 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5227 return self._parse_column_ops(data_type) 5228 5229 if data_type: 5230 index2 = self._index 5231 this = self._parse_primary() 5232 5233 if isinstance(this, exp.Literal): 5234 literal = this.name 5235 this = self._parse_column_ops(this) 5236 5237 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5238 if parser: 5239 return parser(self, this, data_type) 5240 5241 if ( 5242 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5243 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5244 and TIME_ZONE_RE.search(literal) 5245 ): 5246 data_type = exp.DataType.build("TIMESTAMPTZ") 5247 5248 return self.expression(exp.Cast, this=this, to=data_type) 5249 5250 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5251 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5252 # 5253 # If the index difference here is greater than 1, that means the parser itself must have 5254 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5255 # 5256 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5257 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5258 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5259 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5260 # 5261 # In these cases, we don't really want to return the converted type, but instead retreat 5262 # and try to parse a Column or Identifier in the section below. 5263 if data_type.expressions and index2 - index > 1: 5264 self._retreat(index2) 5265 return self._parse_column_ops(data_type) 5266 5267 self._retreat(index) 5268 5269 if fallback_to_identifier: 5270 return self._parse_id_var() 5271 5272 this = self._parse_column() 5273 return this and self._parse_column_ops(this) 5274 5275 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5276 this = self._parse_type() 5277 if not this: 5278 return None 5279 5280 if isinstance(this, exp.Column) and not this.table: 5281 this = exp.var(this.name.upper()) 5282 5283 return self.expression( 5284 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5285 ) 5286 5287 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5288 type_name = identifier.name 5289 5290 while self._match(TokenType.DOT): 5291 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5292 5293 return exp.DataType.build(type_name, dialect=self.dialect, udt=True) 5294 5295 def _parse_types( 5296 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5297 ) -> t.Optional[exp.Expression]: 5298 index = self._index 5299 5300 this: t.Optional[exp.Expression] = None 5301 prefix = self._match_text_seq("SYSUDTLIB", ".") 5302 5303 if self._match_set(self.TYPE_TOKENS): 5304 type_token = self._prev.token_type 5305 else: 5306 type_token = None 5307 identifier = allow_identifiers and self._parse_id_var( 5308 any_token=False, tokens=(TokenType.VAR,) 5309 ) 5310 if isinstance(identifier, exp.Identifier): 5311 try: 5312 tokens = self.dialect.tokenize(identifier.name) 5313 except TokenError: 5314 tokens = None 5315 5316 if tokens and len(tokens) == 1 and tokens[0].token_type in self.TYPE_TOKENS: 5317 type_token = tokens[0].token_type 5318 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5319 this = self._parse_user_defined_type(identifier) 5320 else: 5321 self._retreat(self._index - 1) 5322 return None 5323 else: 5324 return None 5325 5326 if type_token == TokenType.PSEUDO_TYPE: 5327 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5328 5329 if type_token == TokenType.OBJECT_IDENTIFIER: 5330 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5331 5332 # https://materialize.com/docs/sql/types/map/ 5333 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5334 key_type = self._parse_types( 5335 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5336 ) 5337 if not self._match(TokenType.FARROW): 5338 self._retreat(index) 5339 return None 5340 5341 value_type = self._parse_types( 5342 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5343 ) 5344 if not self._match(TokenType.R_BRACKET): 5345 self._retreat(index) 5346 return None 5347 5348 return exp.DataType( 5349 this=exp.DataType.Type.MAP, 5350 expressions=[key_type, value_type], 5351 nested=True, 5352 prefix=prefix, 5353 ) 5354 5355 nested = type_token in self.NESTED_TYPE_TOKENS 5356 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5357 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5358 expressions = None 5359 maybe_func = False 5360 5361 if self._match(TokenType.L_PAREN): 5362 if is_struct: 5363 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5364 elif nested: 5365 expressions = self._parse_csv( 5366 lambda: self._parse_types( 5367 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5368 ) 5369 ) 5370 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5371 this = expressions[0] 5372 this.set("nullable", True) 5373 self._match_r_paren() 5374 return this 5375 elif type_token in self.ENUM_TYPE_TOKENS: 5376 expressions = self._parse_csv(self._parse_equality) 5377 elif is_aggregate: 5378 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5379 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5380 ) 5381 if not func_or_ident: 5382 return None 5383 expressions = [func_or_ident] 5384 if self._match(TokenType.COMMA): 5385 expressions.extend( 5386 self._parse_csv( 5387 lambda: self._parse_types( 5388 check_func=check_func, 5389 schema=schema, 5390 allow_identifiers=allow_identifiers, 5391 ) 5392 ) 5393 ) 5394 else: 5395 expressions = self._parse_csv(self._parse_type_size) 5396 5397 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5398 if type_token == TokenType.VECTOR and len(expressions) == 2: 5399 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5400 5401 if not expressions or not self._match(TokenType.R_PAREN): 5402 self._retreat(index) 5403 return None 5404 5405 maybe_func = True 5406 5407 values: t.Optional[t.List[exp.Expression]] = None 5408 5409 if nested and self._match(TokenType.LT): 5410 if is_struct: 5411 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5412 else: 5413 expressions = self._parse_csv( 5414 lambda: self._parse_types( 5415 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5416 ) 5417 ) 5418 5419 if not self._match(TokenType.GT): 5420 self.raise_error("Expecting >") 5421 5422 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5423 values = self._parse_csv(self._parse_assignment) 5424 if not values and is_struct: 5425 values = None 5426 self._retreat(self._index - 1) 5427 else: 5428 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5429 5430 if type_token in self.TIMESTAMPS: 5431 if self._match_text_seq("WITH", "TIME", "ZONE"): 5432 maybe_func = False 5433 tz_type = ( 5434 exp.DataType.Type.TIMETZ 5435 if type_token in self.TIMES 5436 else exp.DataType.Type.TIMESTAMPTZ 5437 ) 5438 this = exp.DataType(this=tz_type, expressions=expressions) 5439 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5440 maybe_func = False 5441 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5442 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5443 maybe_func = False 5444 elif type_token == TokenType.INTERVAL: 5445 unit = self._parse_var(upper=True) 5446 if unit: 5447 if self._match_text_seq("TO"): 5448 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5449 5450 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5451 else: 5452 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5453 elif type_token == TokenType.VOID: 5454 this = exp.DataType(this=exp.DataType.Type.NULL) 5455 5456 if maybe_func and check_func: 5457 index2 = self._index 5458 peek = self._parse_string() 5459 5460 if not peek: 5461 self._retreat(index) 5462 return None 5463 5464 self._retreat(index2) 5465 5466 if not this: 5467 if self._match_text_seq("UNSIGNED"): 5468 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5469 if not unsigned_type_token: 5470 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5471 5472 type_token = unsigned_type_token or type_token 5473 5474 this = exp.DataType( 5475 this=exp.DataType.Type[type_token.value], 5476 expressions=expressions, 5477 nested=nested, 5478 prefix=prefix, 5479 ) 5480 5481 # Empty arrays/structs are allowed 5482 if values is not None: 5483 cls = exp.Struct if is_struct else exp.Array 5484 this = exp.cast(cls(expressions=values), this, copy=False) 5485 5486 elif expressions: 5487 this.set("expressions", expressions) 5488 5489 # https://materialize.com/docs/sql/types/list/#type-name 5490 while self._match(TokenType.LIST): 5491 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5492 5493 index = self._index 5494 5495 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5496 matched_array = self._match(TokenType.ARRAY) 5497 5498 while self._curr: 5499 datatype_token = self._prev.token_type 5500 matched_l_bracket = self._match(TokenType.L_BRACKET) 5501 5502 if (not matched_l_bracket and not matched_array) or ( 5503 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5504 ): 5505 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5506 # not to be confused with the fixed size array parsing 5507 break 5508 5509 matched_array = False 5510 values = self._parse_csv(self._parse_assignment) or None 5511 if ( 5512 values 5513 and not schema 5514 and ( 5515 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5516 ) 5517 ): 5518 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5519 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5520 self._retreat(index) 5521 break 5522 5523 this = exp.DataType( 5524 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5525 ) 5526 self._match(TokenType.R_BRACKET) 5527 5528 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5529 converter = self.TYPE_CONVERTERS.get(this.this) 5530 if converter: 5531 this = converter(t.cast(exp.DataType, this)) 5532 5533 return this 5534 5535 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5536 index = self._index 5537 5538 if ( 5539 self._curr 5540 and self._next 5541 and self._curr.token_type in self.TYPE_TOKENS 5542 and self._next.token_type in self.TYPE_TOKENS 5543 ): 5544 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5545 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5546 this = self._parse_id_var() 5547 else: 5548 this = ( 5549 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5550 or self._parse_id_var() 5551 ) 5552 5553 self._match(TokenType.COLON) 5554 5555 if ( 5556 type_required 5557 and not isinstance(this, exp.DataType) 5558 and not self._match_set(self.TYPE_TOKENS, advance=False) 5559 ): 5560 self._retreat(index) 5561 return self._parse_types() 5562 5563 return self._parse_column_def(this) 5564 5565 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5566 if not self._match_text_seq("AT", "TIME", "ZONE"): 5567 return this 5568 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5569 5570 def _parse_column(self) -> t.Optional[exp.Expression]: 5571 this = self._parse_column_reference() 5572 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5573 5574 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5575 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5576 5577 return column 5578 5579 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5580 this = self._parse_field() 5581 if ( 5582 not this 5583 and self._match(TokenType.VALUES, advance=False) 5584 and self.VALUES_FOLLOWED_BY_PAREN 5585 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5586 ): 5587 this = self._parse_id_var() 5588 5589 if isinstance(this, exp.Identifier): 5590 # We bubble up comments from the Identifier to the Column 5591 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5592 5593 return this 5594 5595 def _parse_colon_as_variant_extract( 5596 self, this: t.Optional[exp.Expression] 5597 ) -> t.Optional[exp.Expression]: 5598 casts = [] 5599 json_path = [] 5600 escape = None 5601 5602 while self._match(TokenType.COLON): 5603 start_index = self._index 5604 5605 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5606 path = self._parse_column_ops( 5607 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5608 ) 5609 5610 # The cast :: operator has a lower precedence than the extraction operator :, so 5611 # we rearrange the AST appropriately to avoid casting the JSON path 5612 while isinstance(path, exp.Cast): 5613 casts.append(path.to) 5614 path = path.this 5615 5616 if casts: 5617 dcolon_offset = next( 5618 i 5619 for i, t in enumerate(self._tokens[start_index:]) 5620 if t.token_type == TokenType.DCOLON 5621 ) 5622 end_token = self._tokens[start_index + dcolon_offset - 1] 5623 else: 5624 end_token = self._prev 5625 5626 if path: 5627 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5628 # it'll roundtrip to a string literal in GET_PATH 5629 if isinstance(path, exp.Identifier) and path.quoted: 5630 escape = True 5631 5632 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5633 5634 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5635 # Databricks transforms it back to the colon/dot notation 5636 if json_path: 5637 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5638 5639 if json_path_expr: 5640 json_path_expr.set("escape", escape) 5641 5642 this = self.expression( 5643 exp.JSONExtract, 5644 this=this, 5645 expression=json_path_expr, 5646 variant_extract=True, 5647 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5648 ) 5649 5650 while casts: 5651 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5652 5653 return this 5654 5655 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5656 return self._parse_types() 5657 5658 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5659 this = self._parse_bracket(this) 5660 5661 while self._match_set(self.COLUMN_OPERATORS): 5662 op_token = self._prev.token_type 5663 op = self.COLUMN_OPERATORS.get(op_token) 5664 5665 if op_token in self.CAST_COLUMN_OPERATORS: 5666 field = self._parse_dcolon() 5667 if not field: 5668 self.raise_error("Expected type") 5669 elif op and self._curr: 5670 field = self._parse_column_reference() or self._parse_bracket() 5671 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5672 field = self._parse_column_ops(field) 5673 else: 5674 field = self._parse_field(any_token=True, anonymous_func=True) 5675 5676 # Function calls can be qualified, e.g., x.y.FOO() 5677 # This converts the final AST to a series of Dots leading to the function call 5678 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5679 if isinstance(field, (exp.Func, exp.Window)) and this: 5680 this = this.transform( 5681 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5682 ) 5683 5684 if op: 5685 this = op(self, this, field) 5686 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5687 this = self.expression( 5688 exp.Column, 5689 comments=this.comments, 5690 this=field, 5691 table=this.this, 5692 db=this.args.get("table"), 5693 catalog=this.args.get("db"), 5694 ) 5695 elif isinstance(field, exp.Window): 5696 # Move the exp.Dot's to the window's function 5697 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5698 field.set("this", window_func) 5699 this = field 5700 else: 5701 this = self.expression(exp.Dot, this=this, expression=field) 5702 5703 if field and field.comments: 5704 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5705 5706 this = self._parse_bracket(this) 5707 5708 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5709 5710 def _parse_paren(self) -> t.Optional[exp.Expression]: 5711 if not self._match(TokenType.L_PAREN): 5712 return None 5713 5714 comments = self._prev_comments 5715 query = self._parse_select() 5716 5717 if query: 5718 expressions = [query] 5719 else: 5720 expressions = self._parse_expressions() 5721 5722 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5723 5724 if not this and self._match(TokenType.R_PAREN, advance=False): 5725 this = self.expression(exp.Tuple) 5726 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5727 this = self._parse_subquery(this=this, parse_alias=False) 5728 elif isinstance(this, exp.Subquery): 5729 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5730 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5731 this = self.expression(exp.Tuple, expressions=expressions) 5732 else: 5733 this = self.expression(exp.Paren, this=this) 5734 5735 if this: 5736 this.add_comments(comments) 5737 5738 self._match_r_paren(expression=this) 5739 return this 5740 5741 def _parse_primary(self) -> t.Optional[exp.Expression]: 5742 if self._match_set(self.PRIMARY_PARSERS): 5743 token_type = self._prev.token_type 5744 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5745 5746 if token_type == TokenType.STRING: 5747 expressions = [primary] 5748 while self._match(TokenType.STRING): 5749 expressions.append(exp.Literal.string(self._prev.text)) 5750 5751 if len(expressions) > 1: 5752 return self.expression(exp.Concat, expressions=expressions) 5753 5754 return primary 5755 5756 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5757 return exp.Literal.number(f"0.{self._prev.text}") 5758 5759 return self._parse_paren() 5760 5761 def _parse_field( 5762 self, 5763 any_token: bool = False, 5764 tokens: t.Optional[t.Collection[TokenType]] = None, 5765 anonymous_func: bool = False, 5766 ) -> t.Optional[exp.Expression]: 5767 if anonymous_func: 5768 field = ( 5769 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5770 or self._parse_primary() 5771 ) 5772 else: 5773 field = self._parse_primary() or self._parse_function( 5774 anonymous=anonymous_func, any_token=any_token 5775 ) 5776 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5777 5778 def _parse_function( 5779 self, 5780 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5781 anonymous: bool = False, 5782 optional_parens: bool = True, 5783 any_token: bool = False, 5784 ) -> t.Optional[exp.Expression]: 5785 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5786 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5787 fn_syntax = False 5788 if ( 5789 self._match(TokenType.L_BRACE, advance=False) 5790 and self._next 5791 and self._next.text.upper() == "FN" 5792 ): 5793 self._advance(2) 5794 fn_syntax = True 5795 5796 func = self._parse_function_call( 5797 functions=functions, 5798 anonymous=anonymous, 5799 optional_parens=optional_parens, 5800 any_token=any_token, 5801 ) 5802 5803 if fn_syntax: 5804 self._match(TokenType.R_BRACE) 5805 5806 return func 5807 5808 def _parse_function_call( 5809 self, 5810 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5811 anonymous: bool = False, 5812 optional_parens: bool = True, 5813 any_token: bool = False, 5814 ) -> t.Optional[exp.Expression]: 5815 if not self._curr: 5816 return None 5817 5818 comments = self._curr.comments 5819 prev = self._prev 5820 token = self._curr 5821 token_type = self._curr.token_type 5822 this = self._curr.text 5823 upper = this.upper() 5824 5825 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5826 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5827 self._advance() 5828 return self._parse_window(parser(self)) 5829 5830 if not self._next or self._next.token_type != TokenType.L_PAREN: 5831 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5832 self._advance() 5833 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5834 5835 return None 5836 5837 if any_token: 5838 if token_type in self.RESERVED_TOKENS: 5839 return None 5840 elif token_type not in self.FUNC_TOKENS: 5841 return None 5842 5843 self._advance(2) 5844 5845 parser = self.FUNCTION_PARSERS.get(upper) 5846 if parser and not anonymous: 5847 this = parser(self) 5848 else: 5849 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5850 5851 if subquery_predicate: 5852 expr = None 5853 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5854 expr = self._parse_select() 5855 self._match_r_paren() 5856 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5857 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5858 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5859 self._advance(-1) 5860 expr = self._parse_bitwise() 5861 5862 if expr: 5863 return self.expression(subquery_predicate, comments=comments, this=expr) 5864 5865 if functions is None: 5866 functions = self.FUNCTIONS 5867 5868 function = functions.get(upper) 5869 known_function = function and not anonymous 5870 5871 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5872 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5873 5874 post_func_comments = self._curr and self._curr.comments 5875 if known_function and post_func_comments: 5876 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5877 # call we'll construct it as exp.Anonymous, even if it's "known" 5878 if any( 5879 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5880 for comment in post_func_comments 5881 ): 5882 known_function = False 5883 5884 if alias and known_function: 5885 args = self._kv_to_prop_eq(args) 5886 5887 if known_function: 5888 func_builder = t.cast(t.Callable, function) 5889 5890 if "dialect" in func_builder.__code__.co_varnames: 5891 func = func_builder(args, dialect=self.dialect) 5892 else: 5893 func = func_builder(args) 5894 5895 func = self.validate_expression(func, args) 5896 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5897 func.meta["name"] = this 5898 5899 this = func 5900 else: 5901 if token_type == TokenType.IDENTIFIER: 5902 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5903 5904 this = self.expression(exp.Anonymous, this=this, expressions=args) 5905 this = this.update_positions(token) 5906 5907 if isinstance(this, exp.Expression): 5908 this.add_comments(comments) 5909 5910 self._match_r_paren(this) 5911 return self._parse_window(this) 5912 5913 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5914 return expression 5915 5916 def _kv_to_prop_eq( 5917 self, expressions: t.List[exp.Expression], parse_map: bool = False 5918 ) -> t.List[exp.Expression]: 5919 transformed = [] 5920 5921 for index, e in enumerate(expressions): 5922 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5923 if isinstance(e, exp.Alias): 5924 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5925 5926 if not isinstance(e, exp.PropertyEQ): 5927 e = self.expression( 5928 exp.PropertyEQ, 5929 this=e.this if parse_map else exp.to_identifier(e.this.name), 5930 expression=e.expression, 5931 ) 5932 5933 if isinstance(e.this, exp.Column): 5934 e.this.replace(e.this.this) 5935 else: 5936 e = self._to_prop_eq(e, index) 5937 5938 transformed.append(e) 5939 5940 return transformed 5941 5942 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5943 return self._parse_statement() 5944 5945 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5946 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5947 5948 def _parse_user_defined_function( 5949 self, kind: t.Optional[TokenType] = None 5950 ) -> t.Optional[exp.Expression]: 5951 this = self._parse_table_parts(schema=True) 5952 5953 if not self._match(TokenType.L_PAREN): 5954 return this 5955 5956 expressions = self._parse_csv(self._parse_function_parameter) 5957 self._match_r_paren() 5958 return self.expression( 5959 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5960 ) 5961 5962 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5963 literal = self._parse_primary() 5964 if literal: 5965 return self.expression(exp.Introducer, this=token.text, expression=literal) 5966 5967 return self._identifier_expression(token) 5968 5969 def _parse_session_parameter(self) -> exp.SessionParameter: 5970 kind = None 5971 this = self._parse_id_var() or self._parse_primary() 5972 5973 if this and self._match(TokenType.DOT): 5974 kind = this.name 5975 this = self._parse_var() or self._parse_primary() 5976 5977 return self.expression(exp.SessionParameter, this=this, kind=kind) 5978 5979 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5980 return self._parse_id_var() 5981 5982 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5983 index = self._index 5984 5985 if self._match(TokenType.L_PAREN): 5986 expressions = t.cast( 5987 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5988 ) 5989 5990 if not self._match(TokenType.R_PAREN): 5991 self._retreat(index) 5992 else: 5993 expressions = [self._parse_lambda_arg()] 5994 5995 if self._match_set(self.LAMBDAS): 5996 return self.LAMBDAS[self._prev.token_type](self, expressions) 5997 5998 self._retreat(index) 5999 6000 this: t.Optional[exp.Expression] 6001 6002 if self._match(TokenType.DISTINCT): 6003 this = self.expression( 6004 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 6005 ) 6006 else: 6007 this = self._parse_select_or_expression(alias=alias) 6008 6009 return self._parse_limit( 6010 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 6011 ) 6012 6013 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6014 index = self._index 6015 if not self._match(TokenType.L_PAREN): 6016 return this 6017 6018 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 6019 # expr can be of both types 6020 if self._match_set(self.SELECT_START_TOKENS): 6021 self._retreat(index) 6022 return this 6023 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 6024 self._match_r_paren() 6025 return self.expression(exp.Schema, this=this, expressions=args) 6026 6027 def _parse_field_def(self) -> t.Optional[exp.Expression]: 6028 return self._parse_column_def(self._parse_field(any_token=True)) 6029 6030 def _parse_column_def( 6031 self, this: t.Optional[exp.Expression], computed_column: bool = True 6032 ) -> t.Optional[exp.Expression]: 6033 # column defs are not really columns, they're identifiers 6034 if isinstance(this, exp.Column): 6035 this = this.this 6036 6037 if not computed_column: 6038 self._match(TokenType.ALIAS) 6039 6040 kind = self._parse_types(schema=True) 6041 6042 if self._match_text_seq("FOR", "ORDINALITY"): 6043 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6044 6045 constraints: t.List[exp.Expression] = [] 6046 6047 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6048 ("ALIAS", "MATERIALIZED") 6049 ): 6050 persisted = self._prev.text.upper() == "MATERIALIZED" 6051 constraint_kind = exp.ComputedColumnConstraint( 6052 this=self._parse_assignment(), 6053 persisted=persisted or self._match_text_seq("PERSISTED"), 6054 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6055 ) 6056 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6057 elif ( 6058 kind 6059 and self._match(TokenType.ALIAS, advance=False) 6060 and ( 6061 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6062 or (self._next and self._next.token_type == TokenType.L_PAREN) 6063 ) 6064 ): 6065 self._advance() 6066 constraints.append( 6067 self.expression( 6068 exp.ColumnConstraint, 6069 kind=exp.ComputedColumnConstraint( 6070 this=self._parse_disjunction(), 6071 persisted=self._match_texts(("STORED", "VIRTUAL")) 6072 and self._prev.text.upper() == "STORED", 6073 ), 6074 ) 6075 ) 6076 6077 while True: 6078 constraint = self._parse_column_constraint() 6079 if not constraint: 6080 break 6081 constraints.append(constraint) 6082 6083 if not kind and not constraints: 6084 return this 6085 6086 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6087 6088 def _parse_auto_increment( 6089 self, 6090 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6091 start = None 6092 increment = None 6093 order = None 6094 6095 if self._match(TokenType.L_PAREN, advance=False): 6096 args = self._parse_wrapped_csv(self._parse_bitwise) 6097 start = seq_get(args, 0) 6098 increment = seq_get(args, 1) 6099 elif self._match_text_seq("START"): 6100 start = self._parse_bitwise() 6101 self._match_text_seq("INCREMENT") 6102 increment = self._parse_bitwise() 6103 if self._match_text_seq("ORDER"): 6104 order = True 6105 elif self._match_text_seq("NOORDER"): 6106 order = False 6107 6108 if start and increment: 6109 return exp.GeneratedAsIdentityColumnConstraint( 6110 start=start, increment=increment, this=False, order=order 6111 ) 6112 6113 return exp.AutoIncrementColumnConstraint() 6114 6115 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6116 if not self._match_text_seq("REFRESH"): 6117 self._retreat(self._index - 1) 6118 return None 6119 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6120 6121 def _parse_compress(self) -> exp.CompressColumnConstraint: 6122 if self._match(TokenType.L_PAREN, advance=False): 6123 return self.expression( 6124 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6125 ) 6126 6127 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6128 6129 def _parse_generated_as_identity( 6130 self, 6131 ) -> ( 6132 exp.GeneratedAsIdentityColumnConstraint 6133 | exp.ComputedColumnConstraint 6134 | exp.GeneratedAsRowColumnConstraint 6135 ): 6136 if self._match_text_seq("BY", "DEFAULT"): 6137 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6138 this = self.expression( 6139 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6140 ) 6141 else: 6142 self._match_text_seq("ALWAYS") 6143 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6144 6145 self._match(TokenType.ALIAS) 6146 6147 if self._match_text_seq("ROW"): 6148 start = self._match_text_seq("START") 6149 if not start: 6150 self._match(TokenType.END) 6151 hidden = self._match_text_seq("HIDDEN") 6152 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6153 6154 identity = self._match_text_seq("IDENTITY") 6155 6156 if self._match(TokenType.L_PAREN): 6157 if self._match(TokenType.START_WITH): 6158 this.set("start", self._parse_bitwise()) 6159 if self._match_text_seq("INCREMENT", "BY"): 6160 this.set("increment", self._parse_bitwise()) 6161 if self._match_text_seq("MINVALUE"): 6162 this.set("minvalue", self._parse_bitwise()) 6163 if self._match_text_seq("MAXVALUE"): 6164 this.set("maxvalue", self._parse_bitwise()) 6165 6166 if self._match_text_seq("CYCLE"): 6167 this.set("cycle", True) 6168 elif self._match_text_seq("NO", "CYCLE"): 6169 this.set("cycle", False) 6170 6171 if not identity: 6172 this.set("expression", self._parse_range()) 6173 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6174 args = self._parse_csv(self._parse_bitwise) 6175 this.set("start", seq_get(args, 0)) 6176 this.set("increment", seq_get(args, 1)) 6177 6178 self._match_r_paren() 6179 6180 return this 6181 6182 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6183 self._match_text_seq("LENGTH") 6184 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6185 6186 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6187 if self._match_text_seq("NULL"): 6188 return self.expression(exp.NotNullColumnConstraint) 6189 if self._match_text_seq("CASESPECIFIC"): 6190 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6191 if self._match_text_seq("FOR", "REPLICATION"): 6192 return self.expression(exp.NotForReplicationColumnConstraint) 6193 6194 # Unconsume the `NOT` token 6195 self._retreat(self._index - 1) 6196 return None 6197 6198 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6199 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6200 6201 procedure_option_follows = ( 6202 self._match(TokenType.WITH, advance=False) 6203 and self._next 6204 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6205 ) 6206 6207 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6208 return self.expression( 6209 exp.ColumnConstraint, 6210 this=this, 6211 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6212 ) 6213 6214 return this 6215 6216 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6217 if not self._match(TokenType.CONSTRAINT): 6218 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6219 6220 return self.expression( 6221 exp.Constraint, 6222 this=self._parse_id_var(), 6223 expressions=self._parse_unnamed_constraints(), 6224 ) 6225 6226 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6227 constraints = [] 6228 while True: 6229 constraint = self._parse_unnamed_constraint() or self._parse_function() 6230 if not constraint: 6231 break 6232 constraints.append(constraint) 6233 6234 return constraints 6235 6236 def _parse_unnamed_constraint( 6237 self, constraints: t.Optional[t.Collection[str]] = None 6238 ) -> t.Optional[exp.Expression]: 6239 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6240 constraints or self.CONSTRAINT_PARSERS 6241 ): 6242 return None 6243 6244 constraint = self._prev.text.upper() 6245 if constraint not in self.CONSTRAINT_PARSERS: 6246 self.raise_error(f"No parser found for schema constraint {constraint}.") 6247 6248 return self.CONSTRAINT_PARSERS[constraint](self) 6249 6250 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6251 return self._parse_id_var(any_token=False) 6252 6253 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6254 self._match_texts(("KEY", "INDEX")) 6255 return self.expression( 6256 exp.UniqueColumnConstraint, 6257 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6258 this=self._parse_schema(self._parse_unique_key()), 6259 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6260 on_conflict=self._parse_on_conflict(), 6261 options=self._parse_key_constraint_options(), 6262 ) 6263 6264 def _parse_key_constraint_options(self) -> t.List[str]: 6265 options = [] 6266 while True: 6267 if not self._curr: 6268 break 6269 6270 if self._match(TokenType.ON): 6271 action = None 6272 on = self._advance_any() and self._prev.text 6273 6274 if self._match_text_seq("NO", "ACTION"): 6275 action = "NO ACTION" 6276 elif self._match_text_seq("CASCADE"): 6277 action = "CASCADE" 6278 elif self._match_text_seq("RESTRICT"): 6279 action = "RESTRICT" 6280 elif self._match_pair(TokenType.SET, TokenType.NULL): 6281 action = "SET NULL" 6282 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6283 action = "SET DEFAULT" 6284 else: 6285 self.raise_error("Invalid key constraint") 6286 6287 options.append(f"ON {on} {action}") 6288 else: 6289 var = self._parse_var_from_options( 6290 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6291 ) 6292 if not var: 6293 break 6294 options.append(var.name) 6295 6296 return options 6297 6298 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6299 if match and not self._match(TokenType.REFERENCES): 6300 return None 6301 6302 expressions = None 6303 this = self._parse_table(schema=True) 6304 options = self._parse_key_constraint_options() 6305 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6306 6307 def _parse_foreign_key(self) -> exp.ForeignKey: 6308 expressions = ( 6309 self._parse_wrapped_id_vars() 6310 if not self._match(TokenType.REFERENCES, advance=False) 6311 else None 6312 ) 6313 reference = self._parse_references() 6314 on_options = {} 6315 6316 while self._match(TokenType.ON): 6317 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6318 self.raise_error("Expected DELETE or UPDATE") 6319 6320 kind = self._prev.text.lower() 6321 6322 if self._match_text_seq("NO", "ACTION"): 6323 action = "NO ACTION" 6324 elif self._match(TokenType.SET): 6325 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6326 action = "SET " + self._prev.text.upper() 6327 else: 6328 self._advance() 6329 action = self._prev.text.upper() 6330 6331 on_options[kind] = action 6332 6333 return self.expression( 6334 exp.ForeignKey, 6335 expressions=expressions, 6336 reference=reference, 6337 options=self._parse_key_constraint_options(), 6338 **on_options, # type: ignore 6339 ) 6340 6341 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6342 return self._parse_ordered() or self._parse_field() 6343 6344 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6345 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6346 self._retreat(self._index - 1) 6347 return None 6348 6349 id_vars = self._parse_wrapped_id_vars() 6350 return self.expression( 6351 exp.PeriodForSystemTimeConstraint, 6352 this=seq_get(id_vars, 0), 6353 expression=seq_get(id_vars, 1), 6354 ) 6355 6356 def _parse_primary_key( 6357 self, wrapped_optional: bool = False, in_props: bool = False 6358 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6359 desc = ( 6360 self._match_set((TokenType.ASC, TokenType.DESC)) 6361 and self._prev.token_type == TokenType.DESC 6362 ) 6363 6364 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6365 return self.expression( 6366 exp.PrimaryKeyColumnConstraint, 6367 desc=desc, 6368 options=self._parse_key_constraint_options(), 6369 ) 6370 6371 expressions = self._parse_wrapped_csv( 6372 self._parse_primary_key_part, optional=wrapped_optional 6373 ) 6374 6375 return self.expression( 6376 exp.PrimaryKey, 6377 expressions=expressions, 6378 include=self._parse_index_params(), 6379 options=self._parse_key_constraint_options(), 6380 ) 6381 6382 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6383 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6384 6385 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6386 """ 6387 Parses a datetime column in ODBC format. We parse the column into the corresponding 6388 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6389 same as we did for `DATE('yyyy-mm-dd')`. 6390 6391 Reference: 6392 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6393 """ 6394 self._match(TokenType.VAR) 6395 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6396 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6397 if not self._match(TokenType.R_BRACE): 6398 self.raise_error("Expected }") 6399 return expression 6400 6401 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6402 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6403 return this 6404 6405 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6406 map_token = seq_get(self._tokens, self._index - 2) 6407 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6408 else: 6409 parse_map = False 6410 6411 bracket_kind = self._prev.token_type 6412 if ( 6413 bracket_kind == TokenType.L_BRACE 6414 and self._curr 6415 and self._curr.token_type == TokenType.VAR 6416 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6417 ): 6418 return self._parse_odbc_datetime_literal() 6419 6420 expressions = self._parse_csv( 6421 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6422 ) 6423 6424 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6425 self.raise_error("Expected ]") 6426 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6427 self.raise_error("Expected }") 6428 6429 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6430 if bracket_kind == TokenType.L_BRACE: 6431 this = self.expression( 6432 exp.Struct, 6433 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6434 ) 6435 elif not this: 6436 this = build_array_constructor( 6437 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6438 ) 6439 else: 6440 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6441 if constructor_type: 6442 return build_array_constructor( 6443 constructor_type, 6444 args=expressions, 6445 bracket_kind=bracket_kind, 6446 dialect=self.dialect, 6447 ) 6448 6449 expressions = apply_index_offset( 6450 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6451 ) 6452 this = self.expression( 6453 exp.Bracket, 6454 this=this, 6455 expressions=expressions, 6456 comments=this.pop_comments(), 6457 ) 6458 6459 self._add_comments(this) 6460 return self._parse_bracket(this) 6461 6462 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6463 if self._match(TokenType.COLON): 6464 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6465 return this 6466 6467 def _parse_case(self) -> t.Optional[exp.Expression]: 6468 ifs = [] 6469 default = None 6470 6471 comments = self._prev_comments 6472 expression = self._parse_assignment() 6473 6474 while self._match(TokenType.WHEN): 6475 this = self._parse_assignment() 6476 self._match(TokenType.THEN) 6477 then = self._parse_assignment() 6478 ifs.append(self.expression(exp.If, this=this, true=then)) 6479 6480 if self._match(TokenType.ELSE): 6481 default = self._parse_assignment() 6482 6483 if not self._match(TokenType.END): 6484 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6485 default = exp.column("interval") 6486 else: 6487 self.raise_error("Expected END after CASE", self._prev) 6488 6489 return self.expression( 6490 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6491 ) 6492 6493 def _parse_if(self) -> t.Optional[exp.Expression]: 6494 if self._match(TokenType.L_PAREN): 6495 args = self._parse_csv( 6496 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6497 ) 6498 this = self.validate_expression(exp.If.from_arg_list(args), args) 6499 self._match_r_paren() 6500 else: 6501 index = self._index - 1 6502 6503 if self.NO_PAREN_IF_COMMANDS and index == 0: 6504 return self._parse_as_command(self._prev) 6505 6506 condition = self._parse_assignment() 6507 6508 if not condition: 6509 self._retreat(index) 6510 return None 6511 6512 self._match(TokenType.THEN) 6513 true = self._parse_assignment() 6514 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6515 self._match(TokenType.END) 6516 this = self.expression(exp.If, this=condition, true=true, false=false) 6517 6518 return this 6519 6520 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6521 if not self._match_text_seq("VALUE", "FOR"): 6522 self._retreat(self._index - 1) 6523 return None 6524 6525 return self.expression( 6526 exp.NextValueFor, 6527 this=self._parse_column(), 6528 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6529 ) 6530 6531 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6532 this = self._parse_function() or self._parse_var_or_string(upper=True) 6533 6534 if self._match(TokenType.FROM): 6535 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6536 6537 if not self._match(TokenType.COMMA): 6538 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6539 6540 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6541 6542 def _parse_gap_fill(self) -> exp.GapFill: 6543 self._match(TokenType.TABLE) 6544 this = self._parse_table() 6545 6546 self._match(TokenType.COMMA) 6547 args = [this, *self._parse_csv(self._parse_lambda)] 6548 6549 gap_fill = exp.GapFill.from_arg_list(args) 6550 return self.validate_expression(gap_fill, args) 6551 6552 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6553 this = self._parse_assignment() 6554 6555 if not self._match(TokenType.ALIAS): 6556 if self._match(TokenType.COMMA): 6557 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6558 6559 self.raise_error("Expected AS after CAST") 6560 6561 fmt = None 6562 to = self._parse_types() 6563 6564 default = self._match(TokenType.DEFAULT) 6565 if default: 6566 default = self._parse_bitwise() 6567 self._match_text_seq("ON", "CONVERSION", "ERROR") 6568 6569 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6570 fmt_string = self._parse_string() 6571 fmt = self._parse_at_time_zone(fmt_string) 6572 6573 if not to: 6574 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6575 if to.this in exp.DataType.TEMPORAL_TYPES: 6576 this = self.expression( 6577 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6578 this=this, 6579 format=exp.Literal.string( 6580 format_time( 6581 fmt_string.this if fmt_string else "", 6582 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6583 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6584 ) 6585 ), 6586 safe=safe, 6587 ) 6588 6589 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6590 this.set("zone", fmt.args["zone"]) 6591 return this 6592 elif not to: 6593 self.raise_error("Expected TYPE after CAST") 6594 elif isinstance(to, exp.Identifier): 6595 to = exp.DataType.build(to.name, dialect=self.dialect, udt=True) 6596 elif to.this == exp.DataType.Type.CHAR: 6597 if self._match(TokenType.CHARACTER_SET): 6598 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6599 6600 return self.build_cast( 6601 strict=strict, 6602 this=this, 6603 to=to, 6604 format=fmt, 6605 safe=safe, 6606 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6607 default=default, 6608 ) 6609 6610 def _parse_string_agg(self) -> exp.GroupConcat: 6611 if self._match(TokenType.DISTINCT): 6612 args: t.List[t.Optional[exp.Expression]] = [ 6613 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6614 ] 6615 if self._match(TokenType.COMMA): 6616 args.extend(self._parse_csv(self._parse_assignment)) 6617 else: 6618 args = self._parse_csv(self._parse_assignment) # type: ignore 6619 6620 if self._match_text_seq("ON", "OVERFLOW"): 6621 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6622 if self._match_text_seq("ERROR"): 6623 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6624 else: 6625 self._match_text_seq("TRUNCATE") 6626 on_overflow = self.expression( 6627 exp.OverflowTruncateBehavior, 6628 this=self._parse_string(), 6629 with_count=( 6630 self._match_text_seq("WITH", "COUNT") 6631 or not self._match_text_seq("WITHOUT", "COUNT") 6632 ), 6633 ) 6634 else: 6635 on_overflow = None 6636 6637 index = self._index 6638 if not self._match(TokenType.R_PAREN) and args: 6639 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6640 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6641 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6642 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6643 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6644 6645 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6646 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6647 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6648 if not self._match_text_seq("WITHIN", "GROUP"): 6649 self._retreat(index) 6650 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6651 6652 # The corresponding match_r_paren will be called in parse_function (caller) 6653 self._match_l_paren() 6654 6655 return self.expression( 6656 exp.GroupConcat, 6657 this=self._parse_order(this=seq_get(args, 0)), 6658 separator=seq_get(args, 1), 6659 on_overflow=on_overflow, 6660 ) 6661 6662 def _parse_convert( 6663 self, strict: bool, safe: t.Optional[bool] = None 6664 ) -> t.Optional[exp.Expression]: 6665 this = self._parse_bitwise() 6666 6667 if self._match(TokenType.USING): 6668 to: t.Optional[exp.Expression] = self.expression( 6669 exp.CharacterSet, this=self._parse_var() 6670 ) 6671 elif self._match(TokenType.COMMA): 6672 to = self._parse_types() 6673 else: 6674 to = None 6675 6676 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6677 6678 def _parse_xml_table(self) -> exp.XMLTable: 6679 namespaces = None 6680 passing = None 6681 columns = None 6682 6683 if self._match_text_seq("XMLNAMESPACES", "("): 6684 namespaces = self._parse_xml_namespace() 6685 self._match_text_seq(")", ",") 6686 6687 this = self._parse_string() 6688 6689 if self._match_text_seq("PASSING"): 6690 # The BY VALUE keywords are optional and are provided for semantic clarity 6691 self._match_text_seq("BY", "VALUE") 6692 passing = self._parse_csv(self._parse_column) 6693 6694 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6695 6696 if self._match_text_seq("COLUMNS"): 6697 columns = self._parse_csv(self._parse_field_def) 6698 6699 return self.expression( 6700 exp.XMLTable, 6701 this=this, 6702 namespaces=namespaces, 6703 passing=passing, 6704 columns=columns, 6705 by_ref=by_ref, 6706 ) 6707 6708 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6709 namespaces = [] 6710 6711 while True: 6712 if self._match(TokenType.DEFAULT): 6713 uri = self._parse_string() 6714 else: 6715 uri = self._parse_alias(self._parse_string()) 6716 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6717 if not self._match(TokenType.COMMA): 6718 break 6719 6720 return namespaces 6721 6722 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6723 args = self._parse_csv(self._parse_assignment) 6724 6725 if len(args) < 3: 6726 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6727 6728 return self.expression(exp.DecodeCase, expressions=args) 6729 6730 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6731 self._match_text_seq("KEY") 6732 key = self._parse_column() 6733 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6734 self._match_text_seq("VALUE") 6735 value = self._parse_bitwise() 6736 6737 if not key and not value: 6738 return None 6739 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6740 6741 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6742 if not this or not self._match_text_seq("FORMAT", "JSON"): 6743 return this 6744 6745 return self.expression(exp.FormatJson, this=this) 6746 6747 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6748 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6749 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6750 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6751 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6752 else: 6753 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6754 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6755 6756 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6757 6758 if not empty and not error and not null: 6759 return None 6760 6761 return self.expression( 6762 exp.OnCondition, 6763 empty=empty, 6764 error=error, 6765 null=null, 6766 ) 6767 6768 def _parse_on_handling( 6769 self, on: str, *values: str 6770 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6771 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6772 for value in values: 6773 if self._match_text_seq(value, "ON", on): 6774 return f"{value} ON {on}" 6775 6776 index = self._index 6777 if self._match(TokenType.DEFAULT): 6778 default_value = self._parse_bitwise() 6779 if self._match_text_seq("ON", on): 6780 return default_value 6781 6782 self._retreat(index) 6783 6784 return None 6785 6786 @t.overload 6787 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6788 6789 @t.overload 6790 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6791 6792 def _parse_json_object(self, agg=False): 6793 star = self._parse_star() 6794 expressions = ( 6795 [star] 6796 if star 6797 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6798 ) 6799 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6800 6801 unique_keys = None 6802 if self._match_text_seq("WITH", "UNIQUE"): 6803 unique_keys = True 6804 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6805 unique_keys = False 6806 6807 self._match_text_seq("KEYS") 6808 6809 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6810 self._parse_type() 6811 ) 6812 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6813 6814 return self.expression( 6815 exp.JSONObjectAgg if agg else exp.JSONObject, 6816 expressions=expressions, 6817 null_handling=null_handling, 6818 unique_keys=unique_keys, 6819 return_type=return_type, 6820 encoding=encoding, 6821 ) 6822 6823 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6824 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6825 if not self._match_text_seq("NESTED"): 6826 this = self._parse_id_var() 6827 kind = self._parse_types(allow_identifiers=False) 6828 nested = None 6829 else: 6830 this = None 6831 kind = None 6832 nested = True 6833 6834 path = self._match_text_seq("PATH") and self._parse_string() 6835 nested_schema = nested and self._parse_json_schema() 6836 6837 return self.expression( 6838 exp.JSONColumnDef, 6839 this=this, 6840 kind=kind, 6841 path=path, 6842 nested_schema=nested_schema, 6843 ) 6844 6845 def _parse_json_schema(self) -> exp.JSONSchema: 6846 self._match_text_seq("COLUMNS") 6847 return self.expression( 6848 exp.JSONSchema, 6849 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6850 ) 6851 6852 def _parse_json_table(self) -> exp.JSONTable: 6853 this = self._parse_format_json(self._parse_bitwise()) 6854 path = self._match(TokenType.COMMA) and self._parse_string() 6855 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6856 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6857 schema = self._parse_json_schema() 6858 6859 return exp.JSONTable( 6860 this=this, 6861 schema=schema, 6862 path=path, 6863 error_handling=error_handling, 6864 empty_handling=empty_handling, 6865 ) 6866 6867 def _parse_match_against(self) -> exp.MatchAgainst: 6868 if self._match_text_seq("TABLE"): 6869 # parse SingleStore MATCH(TABLE ...) syntax 6870 # https://docs.singlestore.com/cloud/reference/sql-reference/full-text-search-functions/match/ 6871 expressions = [] 6872 table = self._parse_table() 6873 if table: 6874 expressions = [table] 6875 else: 6876 expressions = self._parse_csv(self._parse_column) 6877 6878 self._match_text_seq(")", "AGAINST", "(") 6879 6880 this = self._parse_string() 6881 6882 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6883 modifier = "IN NATURAL LANGUAGE MODE" 6884 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6885 modifier = f"{modifier} WITH QUERY EXPANSION" 6886 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6887 modifier = "IN BOOLEAN MODE" 6888 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6889 modifier = "WITH QUERY EXPANSION" 6890 else: 6891 modifier = None 6892 6893 return self.expression( 6894 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6895 ) 6896 6897 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6898 def _parse_open_json(self) -> exp.OpenJSON: 6899 this = self._parse_bitwise() 6900 path = self._match(TokenType.COMMA) and self._parse_string() 6901 6902 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6903 this = self._parse_field(any_token=True) 6904 kind = self._parse_types() 6905 path = self._parse_string() 6906 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6907 6908 return self.expression( 6909 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6910 ) 6911 6912 expressions = None 6913 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6914 self._match_l_paren() 6915 expressions = self._parse_csv(_parse_open_json_column_def) 6916 6917 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6918 6919 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6920 args = self._parse_csv(self._parse_bitwise) 6921 6922 if self._match(TokenType.IN): 6923 return self.expression( 6924 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6925 ) 6926 6927 if haystack_first: 6928 haystack = seq_get(args, 0) 6929 needle = seq_get(args, 1) 6930 else: 6931 haystack = seq_get(args, 1) 6932 needle = seq_get(args, 0) 6933 6934 return self.expression( 6935 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6936 ) 6937 6938 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6939 args = self._parse_csv(self._parse_table) 6940 return exp.JoinHint(this=func_name.upper(), expressions=args) 6941 6942 def _parse_substring(self) -> exp.Substring: 6943 # Postgres supports the form: substring(string [from int] [for int]) 6944 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6945 6946 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6947 6948 if self._match(TokenType.FROM): 6949 args.append(self._parse_bitwise()) 6950 if self._match(TokenType.FOR): 6951 if len(args) == 1: 6952 args.append(exp.Literal.number(1)) 6953 args.append(self._parse_bitwise()) 6954 6955 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6956 6957 def _parse_trim(self) -> exp.Trim: 6958 # https://www.w3resource.com/sql/character-functions/trim.php 6959 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6960 6961 position = None 6962 collation = None 6963 expression = None 6964 6965 if self._match_texts(self.TRIM_TYPES): 6966 position = self._prev.text.upper() 6967 6968 this = self._parse_bitwise() 6969 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6970 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6971 expression = self._parse_bitwise() 6972 6973 if invert_order: 6974 this, expression = expression, this 6975 6976 if self._match(TokenType.COLLATE): 6977 collation = self._parse_bitwise() 6978 6979 return self.expression( 6980 exp.Trim, this=this, position=position, expression=expression, collation=collation 6981 ) 6982 6983 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6984 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6985 6986 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6987 return self._parse_window(self._parse_id_var(), alias=True) 6988 6989 def _parse_respect_or_ignore_nulls( 6990 self, this: t.Optional[exp.Expression] 6991 ) -> t.Optional[exp.Expression]: 6992 if self._match_text_seq("IGNORE", "NULLS"): 6993 return self.expression(exp.IgnoreNulls, this=this) 6994 if self._match_text_seq("RESPECT", "NULLS"): 6995 return self.expression(exp.RespectNulls, this=this) 6996 return this 6997 6998 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6999 if self._match(TokenType.HAVING): 7000 self._match_texts(("MAX", "MIN")) 7001 max = self._prev.text.upper() != "MIN" 7002 return self.expression( 7003 exp.HavingMax, this=this, expression=self._parse_column(), max=max 7004 ) 7005 7006 return this 7007 7008 def _parse_window( 7009 self, this: t.Optional[exp.Expression], alias: bool = False 7010 ) -> t.Optional[exp.Expression]: 7011 func = this 7012 comments = func.comments if isinstance(func, exp.Expression) else None 7013 7014 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 7015 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 7016 if self._match_text_seq("WITHIN", "GROUP"): 7017 order = self._parse_wrapped(self._parse_order) 7018 this = self.expression(exp.WithinGroup, this=this, expression=order) 7019 7020 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 7021 self._match(TokenType.WHERE) 7022 this = self.expression( 7023 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 7024 ) 7025 self._match_r_paren() 7026 7027 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 7028 # Some dialects choose to implement and some do not. 7029 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 7030 7031 # There is some code above in _parse_lambda that handles 7032 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 7033 7034 # The below changes handle 7035 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7036 7037 # Oracle allows both formats 7038 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7039 # and Snowflake chose to do the same for familiarity 7040 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7041 if isinstance(this, exp.AggFunc): 7042 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7043 7044 if ignore_respect and ignore_respect is not this: 7045 ignore_respect.replace(ignore_respect.this) 7046 this = self.expression(ignore_respect.__class__, this=this) 7047 7048 this = self._parse_respect_or_ignore_nulls(this) 7049 7050 # bigquery select from window x AS (partition by ...) 7051 if alias: 7052 over = None 7053 self._match(TokenType.ALIAS) 7054 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7055 return this 7056 else: 7057 over = self._prev.text.upper() 7058 7059 if comments and isinstance(func, exp.Expression): 7060 func.pop_comments() 7061 7062 if not self._match(TokenType.L_PAREN): 7063 return self.expression( 7064 exp.Window, 7065 comments=comments, 7066 this=this, 7067 alias=self._parse_id_var(False), 7068 over=over, 7069 ) 7070 7071 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7072 7073 first = self._match(TokenType.FIRST) 7074 if self._match_text_seq("LAST"): 7075 first = False 7076 7077 partition, order = self._parse_partition_and_order() 7078 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7079 7080 if kind: 7081 self._match(TokenType.BETWEEN) 7082 start = self._parse_window_spec() 7083 self._match(TokenType.AND) 7084 end = self._parse_window_spec() 7085 exclude = ( 7086 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7087 if self._match_text_seq("EXCLUDE") 7088 else None 7089 ) 7090 7091 spec = self.expression( 7092 exp.WindowSpec, 7093 kind=kind, 7094 start=start["value"], 7095 start_side=start["side"], 7096 end=end["value"], 7097 end_side=end["side"], 7098 exclude=exclude, 7099 ) 7100 else: 7101 spec = None 7102 7103 self._match_r_paren() 7104 7105 window = self.expression( 7106 exp.Window, 7107 comments=comments, 7108 this=this, 7109 partition_by=partition, 7110 order=order, 7111 spec=spec, 7112 alias=window_alias, 7113 over=over, 7114 first=first, 7115 ) 7116 7117 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7118 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7119 return self._parse_window(window, alias=alias) 7120 7121 return window 7122 7123 def _parse_partition_and_order( 7124 self, 7125 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7126 return self._parse_partition_by(), self._parse_order() 7127 7128 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7129 self._match(TokenType.BETWEEN) 7130 7131 return { 7132 "value": ( 7133 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7134 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7135 or self._parse_bitwise() 7136 ), 7137 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7138 } 7139 7140 def _parse_alias( 7141 self, this: t.Optional[exp.Expression], explicit: bool = False 7142 ) -> t.Optional[exp.Expression]: 7143 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7144 # so this section tries to parse the clause version and if it fails, it treats the token 7145 # as an identifier (alias) 7146 if self._can_parse_limit_or_offset(): 7147 return this 7148 7149 any_token = self._match(TokenType.ALIAS) 7150 comments = self._prev_comments or [] 7151 7152 if explicit and not any_token: 7153 return this 7154 7155 if self._match(TokenType.L_PAREN): 7156 aliases = self.expression( 7157 exp.Aliases, 7158 comments=comments, 7159 this=this, 7160 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7161 ) 7162 self._match_r_paren(aliases) 7163 return aliases 7164 7165 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7166 self.STRING_ALIASES and self._parse_string_as_identifier() 7167 ) 7168 7169 if alias: 7170 comments.extend(alias.pop_comments()) 7171 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7172 column = this.this 7173 7174 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7175 if not this.comments and column and column.comments: 7176 this.comments = column.pop_comments() 7177 7178 return this 7179 7180 def _parse_id_var( 7181 self, 7182 any_token: bool = True, 7183 tokens: t.Optional[t.Collection[TokenType]] = None, 7184 ) -> t.Optional[exp.Expression]: 7185 expression = self._parse_identifier() 7186 if not expression and ( 7187 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7188 ): 7189 quoted = self._prev.token_type == TokenType.STRING 7190 expression = self._identifier_expression(quoted=quoted) 7191 7192 return expression 7193 7194 def _parse_string(self) -> t.Optional[exp.Expression]: 7195 if self._match_set(self.STRING_PARSERS): 7196 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7197 return self._parse_placeholder() 7198 7199 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7200 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7201 if output: 7202 output.update_positions(self._prev) 7203 return output 7204 7205 def _parse_number(self) -> t.Optional[exp.Expression]: 7206 if self._match_set(self.NUMERIC_PARSERS): 7207 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7208 return self._parse_placeholder() 7209 7210 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7211 if self._match(TokenType.IDENTIFIER): 7212 return self._identifier_expression(quoted=True) 7213 return self._parse_placeholder() 7214 7215 def _parse_var( 7216 self, 7217 any_token: bool = False, 7218 tokens: t.Optional[t.Collection[TokenType]] = None, 7219 upper: bool = False, 7220 ) -> t.Optional[exp.Expression]: 7221 if ( 7222 (any_token and self._advance_any()) 7223 or self._match(TokenType.VAR) 7224 or (self._match_set(tokens) if tokens else False) 7225 ): 7226 return self.expression( 7227 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7228 ) 7229 return self._parse_placeholder() 7230 7231 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7232 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7233 self._advance() 7234 return self._prev 7235 return None 7236 7237 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7238 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7239 7240 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7241 return self._parse_primary() or self._parse_var(any_token=True) 7242 7243 def _parse_null(self) -> t.Optional[exp.Expression]: 7244 if self._match_set(self.NULL_TOKENS): 7245 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7246 return self._parse_placeholder() 7247 7248 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7249 if self._match(TokenType.TRUE): 7250 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7251 if self._match(TokenType.FALSE): 7252 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7253 return self._parse_placeholder() 7254 7255 def _parse_star(self) -> t.Optional[exp.Expression]: 7256 if self._match(TokenType.STAR): 7257 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7258 return self._parse_placeholder() 7259 7260 def _parse_parameter(self) -> exp.Parameter: 7261 this = self._parse_identifier() or self._parse_primary_or_var() 7262 return self.expression(exp.Parameter, this=this) 7263 7264 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7265 if self._match_set(self.PLACEHOLDER_PARSERS): 7266 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7267 if placeholder: 7268 return placeholder 7269 self._advance(-1) 7270 return None 7271 7272 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7273 if not self._match_texts(keywords): 7274 return None 7275 if self._match(TokenType.L_PAREN, advance=False): 7276 return self._parse_wrapped_csv(self._parse_expression) 7277 7278 expression = self._parse_expression() 7279 return [expression] if expression else None 7280 7281 def _parse_csv( 7282 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7283 ) -> t.List[exp.Expression]: 7284 parse_result = parse_method() 7285 items = [parse_result] if parse_result is not None else [] 7286 7287 while self._match(sep): 7288 self._add_comments(parse_result) 7289 parse_result = parse_method() 7290 if parse_result is not None: 7291 items.append(parse_result) 7292 7293 return items 7294 7295 def _parse_tokens( 7296 self, parse_method: t.Callable, expressions: t.Dict 7297 ) -> t.Optional[exp.Expression]: 7298 this = parse_method() 7299 7300 while self._match_set(expressions): 7301 this = self.expression( 7302 expressions[self._prev.token_type], 7303 this=this, 7304 comments=self._prev_comments, 7305 expression=parse_method(), 7306 ) 7307 7308 return this 7309 7310 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7311 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7312 7313 def _parse_wrapped_csv( 7314 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7315 ) -> t.List[exp.Expression]: 7316 return self._parse_wrapped( 7317 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7318 ) 7319 7320 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7321 wrapped = self._match(TokenType.L_PAREN) 7322 if not wrapped and not optional: 7323 self.raise_error("Expecting (") 7324 parse_result = parse_method() 7325 if wrapped: 7326 self._match_r_paren() 7327 return parse_result 7328 7329 def _parse_expressions(self) -> t.List[exp.Expression]: 7330 return self._parse_csv(self._parse_expression) 7331 7332 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7333 return ( 7334 self._parse_set_operations( 7335 self._parse_alias(self._parse_assignment(), explicit=True) 7336 if alias 7337 else self._parse_assignment() 7338 ) 7339 or self._parse_select() 7340 ) 7341 7342 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7343 return self._parse_query_modifiers( 7344 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7345 ) 7346 7347 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7348 this = None 7349 if self._match_texts(self.TRANSACTION_KIND): 7350 this = self._prev.text 7351 7352 self._match_texts(("TRANSACTION", "WORK")) 7353 7354 modes = [] 7355 while True: 7356 mode = [] 7357 while self._match(TokenType.VAR) or self._match(TokenType.NOT): 7358 mode.append(self._prev.text) 7359 7360 if mode: 7361 modes.append(" ".join(mode)) 7362 if not self._match(TokenType.COMMA): 7363 break 7364 7365 return self.expression(exp.Transaction, this=this, modes=modes) 7366 7367 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7368 chain = None 7369 savepoint = None 7370 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7371 7372 self._match_texts(("TRANSACTION", "WORK")) 7373 7374 if self._match_text_seq("TO"): 7375 self._match_text_seq("SAVEPOINT") 7376 savepoint = self._parse_id_var() 7377 7378 if self._match(TokenType.AND): 7379 chain = not self._match_text_seq("NO") 7380 self._match_text_seq("CHAIN") 7381 7382 if is_rollback: 7383 return self.expression(exp.Rollback, savepoint=savepoint) 7384 7385 return self.expression(exp.Commit, chain=chain) 7386 7387 def _parse_refresh(self) -> exp.Refresh: 7388 self._match(TokenType.TABLE) 7389 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7390 7391 def _parse_column_def_with_exists(self): 7392 start = self._index 7393 self._match(TokenType.COLUMN) 7394 7395 exists_column = self._parse_exists(not_=True) 7396 expression = self._parse_field_def() 7397 7398 if not isinstance(expression, exp.ColumnDef): 7399 self._retreat(start) 7400 return None 7401 7402 expression.set("exists", exists_column) 7403 7404 return expression 7405 7406 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7407 if not self._prev.text.upper() == "ADD": 7408 return None 7409 7410 expression = self._parse_column_def_with_exists() 7411 if not expression: 7412 return None 7413 7414 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7415 if self._match_texts(("FIRST", "AFTER")): 7416 position = self._prev.text 7417 column_position = self.expression( 7418 exp.ColumnPosition, this=self._parse_column(), position=position 7419 ) 7420 expression.set("position", column_position) 7421 7422 return expression 7423 7424 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7425 drop = self._match(TokenType.DROP) and self._parse_drop() 7426 if drop and not isinstance(drop, exp.Command): 7427 drop.set("kind", drop.args.get("kind", "COLUMN")) 7428 return drop 7429 7430 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7431 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7432 return self.expression( 7433 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7434 ) 7435 7436 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7437 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7438 self._match_text_seq("ADD") 7439 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7440 return self.expression( 7441 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7442 ) 7443 7444 column_def = self._parse_add_column() 7445 if isinstance(column_def, exp.ColumnDef): 7446 return column_def 7447 7448 exists = self._parse_exists(not_=True) 7449 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7450 return self.expression( 7451 exp.AddPartition, 7452 exists=exists, 7453 this=self._parse_field(any_token=True), 7454 location=self._match_text_seq("LOCATION", advance=False) 7455 and self._parse_property(), 7456 ) 7457 7458 return None 7459 7460 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7461 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7462 or self._match_text_seq("COLUMNS") 7463 ): 7464 schema = self._parse_schema() 7465 7466 return ( 7467 ensure_list(schema) 7468 if schema 7469 else self._parse_csv(self._parse_column_def_with_exists) 7470 ) 7471 7472 return self._parse_csv(_parse_add_alteration) 7473 7474 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7475 if self._match_texts(self.ALTER_ALTER_PARSERS): 7476 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7477 7478 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7479 # keyword after ALTER we default to parsing this statement 7480 self._match(TokenType.COLUMN) 7481 column = self._parse_field(any_token=True) 7482 7483 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7484 return self.expression(exp.AlterColumn, this=column, drop=True) 7485 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7486 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7487 if self._match(TokenType.COMMENT): 7488 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7489 if self._match_text_seq("DROP", "NOT", "NULL"): 7490 return self.expression( 7491 exp.AlterColumn, 7492 this=column, 7493 drop=True, 7494 allow_null=True, 7495 ) 7496 if self._match_text_seq("SET", "NOT", "NULL"): 7497 return self.expression( 7498 exp.AlterColumn, 7499 this=column, 7500 allow_null=False, 7501 ) 7502 7503 if self._match_text_seq("SET", "VISIBLE"): 7504 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7505 if self._match_text_seq("SET", "INVISIBLE"): 7506 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7507 7508 self._match_text_seq("SET", "DATA") 7509 self._match_text_seq("TYPE") 7510 return self.expression( 7511 exp.AlterColumn, 7512 this=column, 7513 dtype=self._parse_types(), 7514 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7515 using=self._match(TokenType.USING) and self._parse_assignment(), 7516 ) 7517 7518 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7519 if self._match_texts(("ALL", "EVEN", "AUTO")): 7520 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7521 7522 self._match_text_seq("KEY", "DISTKEY") 7523 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7524 7525 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7526 if compound: 7527 self._match_text_seq("SORTKEY") 7528 7529 if self._match(TokenType.L_PAREN, advance=False): 7530 return self.expression( 7531 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7532 ) 7533 7534 self._match_texts(("AUTO", "NONE")) 7535 return self.expression( 7536 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7537 ) 7538 7539 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7540 index = self._index - 1 7541 7542 partition_exists = self._parse_exists() 7543 if self._match(TokenType.PARTITION, advance=False): 7544 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7545 7546 self._retreat(index) 7547 return self._parse_csv(self._parse_drop_column) 7548 7549 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7550 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7551 exists = self._parse_exists() 7552 old_column = self._parse_column() 7553 to = self._match_text_seq("TO") 7554 new_column = self._parse_column() 7555 7556 if old_column is None or to is None or new_column is None: 7557 return None 7558 7559 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7560 7561 self._match_text_seq("TO") 7562 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7563 7564 def _parse_alter_table_set(self) -> exp.AlterSet: 7565 alter_set = self.expression(exp.AlterSet) 7566 7567 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7568 "TABLE", "PROPERTIES" 7569 ): 7570 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7571 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7572 alter_set.set("expressions", [self._parse_assignment()]) 7573 elif self._match_texts(("LOGGED", "UNLOGGED")): 7574 alter_set.set("option", exp.var(self._prev.text.upper())) 7575 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7576 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7577 elif self._match_text_seq("LOCATION"): 7578 alter_set.set("location", self._parse_field()) 7579 elif self._match_text_seq("ACCESS", "METHOD"): 7580 alter_set.set("access_method", self._parse_field()) 7581 elif self._match_text_seq("TABLESPACE"): 7582 alter_set.set("tablespace", self._parse_field()) 7583 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7584 alter_set.set("file_format", [self._parse_field()]) 7585 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7586 alter_set.set("file_format", self._parse_wrapped_options()) 7587 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7588 alter_set.set("copy_options", self._parse_wrapped_options()) 7589 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7590 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7591 else: 7592 if self._match_text_seq("SERDE"): 7593 alter_set.set("serde", self._parse_field()) 7594 7595 properties = self._parse_wrapped(self._parse_properties, optional=True) 7596 alter_set.set("expressions", [properties]) 7597 7598 return alter_set 7599 7600 def _parse_alter_session(self) -> exp.AlterSession: 7601 """Parse ALTER SESSION SET/UNSET statements.""" 7602 if self._match(TokenType.SET): 7603 expressions = self._parse_csv(lambda: self._parse_set_item_assignment()) 7604 return self.expression(exp.AlterSession, expressions=expressions, unset=False) 7605 7606 self._match_text_seq("UNSET") 7607 expressions = self._parse_csv( 7608 lambda: self.expression(exp.SetItem, this=self._parse_id_var(any_token=True)) 7609 ) 7610 return self.expression(exp.AlterSession, expressions=expressions, unset=True) 7611 7612 def _parse_alter(self) -> exp.Alter | exp.Command: 7613 start = self._prev 7614 7615 alter_token = self._match_set(self.ALTERABLES) and self._prev 7616 if not alter_token: 7617 return self._parse_as_command(start) 7618 7619 exists = self._parse_exists() 7620 only = self._match_text_seq("ONLY") 7621 7622 if alter_token.token_type == TokenType.SESSION: 7623 this = None 7624 check = None 7625 cluster = None 7626 else: 7627 this = self._parse_table(schema=True) 7628 check = self._match_text_seq("WITH", "CHECK") 7629 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7630 7631 if self._next: 7632 self._advance() 7633 7634 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7635 if parser: 7636 actions = ensure_list(parser(self)) 7637 not_valid = self._match_text_seq("NOT", "VALID") 7638 options = self._parse_csv(self._parse_property) 7639 7640 if not self._curr and actions: 7641 return self.expression( 7642 exp.Alter, 7643 this=this, 7644 kind=alter_token.text.upper(), 7645 exists=exists, 7646 actions=actions, 7647 only=only, 7648 options=options, 7649 cluster=cluster, 7650 not_valid=not_valid, 7651 check=check, 7652 ) 7653 7654 return self._parse_as_command(start) 7655 7656 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7657 start = self._prev 7658 # https://duckdb.org/docs/sql/statements/analyze 7659 if not self._curr: 7660 return self.expression(exp.Analyze) 7661 7662 options = [] 7663 while self._match_texts(self.ANALYZE_STYLES): 7664 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7665 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7666 else: 7667 options.append(self._prev.text.upper()) 7668 7669 this: t.Optional[exp.Expression] = None 7670 inner_expression: t.Optional[exp.Expression] = None 7671 7672 kind = self._curr and self._curr.text.upper() 7673 7674 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7675 this = self._parse_table_parts() 7676 elif self._match_text_seq("TABLES"): 7677 if self._match_set((TokenType.FROM, TokenType.IN)): 7678 kind = f"{kind} {self._prev.text.upper()}" 7679 this = self._parse_table(schema=True, is_db_reference=True) 7680 elif self._match_text_seq("DATABASE"): 7681 this = self._parse_table(schema=True, is_db_reference=True) 7682 elif self._match_text_seq("CLUSTER"): 7683 this = self._parse_table() 7684 # Try matching inner expr keywords before fallback to parse table. 7685 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7686 kind = None 7687 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7688 else: 7689 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7690 kind = None 7691 this = self._parse_table_parts() 7692 7693 partition = self._try_parse(self._parse_partition) 7694 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7695 return self._parse_as_command(start) 7696 7697 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7698 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7699 "WITH", "ASYNC", "MODE" 7700 ): 7701 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7702 else: 7703 mode = None 7704 7705 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7706 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7707 7708 properties = self._parse_properties() 7709 return self.expression( 7710 exp.Analyze, 7711 kind=kind, 7712 this=this, 7713 mode=mode, 7714 partition=partition, 7715 properties=properties, 7716 expression=inner_expression, 7717 options=options, 7718 ) 7719 7720 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7721 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7722 this = None 7723 kind = self._prev.text.upper() 7724 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7725 expressions = [] 7726 7727 if not self._match_text_seq("STATISTICS"): 7728 self.raise_error("Expecting token STATISTICS") 7729 7730 if self._match_text_seq("NOSCAN"): 7731 this = "NOSCAN" 7732 elif self._match(TokenType.FOR): 7733 if self._match_text_seq("ALL", "COLUMNS"): 7734 this = "FOR ALL COLUMNS" 7735 if self._match_texts("COLUMNS"): 7736 this = "FOR COLUMNS" 7737 expressions = self._parse_csv(self._parse_column_reference) 7738 elif self._match_text_seq("SAMPLE"): 7739 sample = self._parse_number() 7740 expressions = [ 7741 self.expression( 7742 exp.AnalyzeSample, 7743 sample=sample, 7744 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7745 ) 7746 ] 7747 7748 return self.expression( 7749 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7750 ) 7751 7752 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7753 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7754 kind = None 7755 this = None 7756 expression: t.Optional[exp.Expression] = None 7757 if self._match_text_seq("REF", "UPDATE"): 7758 kind = "REF" 7759 this = "UPDATE" 7760 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7761 this = "UPDATE SET DANGLING TO NULL" 7762 elif self._match_text_seq("STRUCTURE"): 7763 kind = "STRUCTURE" 7764 if self._match_text_seq("CASCADE", "FAST"): 7765 this = "CASCADE FAST" 7766 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7767 ("ONLINE", "OFFLINE") 7768 ): 7769 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7770 expression = self._parse_into() 7771 7772 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7773 7774 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7775 this = self._prev.text.upper() 7776 if self._match_text_seq("COLUMNS"): 7777 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7778 return None 7779 7780 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7781 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7782 if self._match_text_seq("STATISTICS"): 7783 return self.expression(exp.AnalyzeDelete, kind=kind) 7784 return None 7785 7786 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7787 if self._match_text_seq("CHAINED", "ROWS"): 7788 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7789 return None 7790 7791 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7792 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7793 this = self._prev.text.upper() 7794 expression: t.Optional[exp.Expression] = None 7795 expressions = [] 7796 update_options = None 7797 7798 if self._match_text_seq("HISTOGRAM", "ON"): 7799 expressions = self._parse_csv(self._parse_column_reference) 7800 with_expressions = [] 7801 while self._match(TokenType.WITH): 7802 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7803 if self._match_texts(("SYNC", "ASYNC")): 7804 if self._match_text_seq("MODE", advance=False): 7805 with_expressions.append(f"{self._prev.text.upper()} MODE") 7806 self._advance() 7807 else: 7808 buckets = self._parse_number() 7809 if self._match_text_seq("BUCKETS"): 7810 with_expressions.append(f"{buckets} BUCKETS") 7811 if with_expressions: 7812 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7813 7814 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7815 TokenType.UPDATE, advance=False 7816 ): 7817 update_options = self._prev.text.upper() 7818 self._advance() 7819 elif self._match_text_seq("USING", "DATA"): 7820 expression = self.expression(exp.UsingData, this=self._parse_string()) 7821 7822 return self.expression( 7823 exp.AnalyzeHistogram, 7824 this=this, 7825 expressions=expressions, 7826 expression=expression, 7827 update_options=update_options, 7828 ) 7829 7830 def _parse_merge(self) -> exp.Merge: 7831 self._match(TokenType.INTO) 7832 target = self._parse_table() 7833 7834 if target and self._match(TokenType.ALIAS, advance=False): 7835 target.set("alias", self._parse_table_alias()) 7836 7837 self._match(TokenType.USING) 7838 using = self._parse_table() 7839 7840 self._match(TokenType.ON) 7841 on = self._parse_assignment() 7842 7843 return self.expression( 7844 exp.Merge, 7845 this=target, 7846 using=using, 7847 on=on, 7848 whens=self._parse_when_matched(), 7849 returning=self._parse_returning(), 7850 ) 7851 7852 def _parse_when_matched(self) -> exp.Whens: 7853 whens = [] 7854 7855 while self._match(TokenType.WHEN): 7856 matched = not self._match(TokenType.NOT) 7857 self._match_text_seq("MATCHED") 7858 source = ( 7859 False 7860 if self._match_text_seq("BY", "TARGET") 7861 else self._match_text_seq("BY", "SOURCE") 7862 ) 7863 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7864 7865 self._match(TokenType.THEN) 7866 7867 if self._match(TokenType.INSERT): 7868 this = self._parse_star() 7869 if this: 7870 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7871 else: 7872 then = self.expression( 7873 exp.Insert, 7874 this=exp.var("ROW") 7875 if self._match_text_seq("ROW") 7876 else self._parse_value(values=False), 7877 expression=self._match_text_seq("VALUES") and self._parse_value(), 7878 ) 7879 elif self._match(TokenType.UPDATE): 7880 expressions = self._parse_star() 7881 if expressions: 7882 then = self.expression(exp.Update, expressions=expressions) 7883 else: 7884 then = self.expression( 7885 exp.Update, 7886 expressions=self._match(TokenType.SET) 7887 and self._parse_csv(self._parse_equality), 7888 ) 7889 elif self._match(TokenType.DELETE): 7890 then = self.expression(exp.Var, this=self._prev.text) 7891 else: 7892 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7893 7894 whens.append( 7895 self.expression( 7896 exp.When, 7897 matched=matched, 7898 source=source, 7899 condition=condition, 7900 then=then, 7901 ) 7902 ) 7903 return self.expression(exp.Whens, expressions=whens) 7904 7905 def _parse_show(self) -> t.Optional[exp.Expression]: 7906 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7907 if parser: 7908 return parser(self) 7909 return self._parse_as_command(self._prev) 7910 7911 def _parse_set_item_assignment( 7912 self, kind: t.Optional[str] = None 7913 ) -> t.Optional[exp.Expression]: 7914 index = self._index 7915 7916 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7917 return self._parse_set_transaction(global_=kind == "GLOBAL") 7918 7919 left = self._parse_primary() or self._parse_column() 7920 assignment_delimiter = self._match_texts(("=", "TO")) 7921 7922 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7923 self._retreat(index) 7924 return None 7925 7926 right = self._parse_statement() or self._parse_id_var() 7927 if isinstance(right, (exp.Column, exp.Identifier)): 7928 right = exp.var(right.name) 7929 7930 this = self.expression(exp.EQ, this=left, expression=right) 7931 return self.expression(exp.SetItem, this=this, kind=kind) 7932 7933 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7934 self._match_text_seq("TRANSACTION") 7935 characteristics = self._parse_csv( 7936 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7937 ) 7938 return self.expression( 7939 exp.SetItem, 7940 expressions=characteristics, 7941 kind="TRANSACTION", 7942 **{"global": global_}, # type: ignore 7943 ) 7944 7945 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7946 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7947 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7948 7949 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7950 index = self._index 7951 set_ = self.expression( 7952 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7953 ) 7954 7955 if self._curr: 7956 self._retreat(index) 7957 return self._parse_as_command(self._prev) 7958 7959 return set_ 7960 7961 def _parse_var_from_options( 7962 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7963 ) -> t.Optional[exp.Var]: 7964 start = self._curr 7965 if not start: 7966 return None 7967 7968 option = start.text.upper() 7969 continuations = options.get(option) 7970 7971 index = self._index 7972 self._advance() 7973 for keywords in continuations or []: 7974 if isinstance(keywords, str): 7975 keywords = (keywords,) 7976 7977 if self._match_text_seq(*keywords): 7978 option = f"{option} {' '.join(keywords)}" 7979 break 7980 else: 7981 if continuations or continuations is None: 7982 if raise_unmatched: 7983 self.raise_error(f"Unknown option {option}") 7984 7985 self._retreat(index) 7986 return None 7987 7988 return exp.var(option) 7989 7990 def _parse_as_command(self, start: Token) -> exp.Command: 7991 while self._curr: 7992 self._advance() 7993 text = self._find_sql(start, self._prev) 7994 size = len(start.text) 7995 self._warn_unsupported() 7996 return exp.Command(this=text[:size], expression=text[size:]) 7997 7998 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7999 settings = [] 8000 8001 self._match_l_paren() 8002 kind = self._parse_id_var() 8003 8004 if self._match(TokenType.L_PAREN): 8005 while True: 8006 key = self._parse_id_var() 8007 value = self._parse_primary() 8008 if not key and value is None: 8009 break 8010 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 8011 self._match(TokenType.R_PAREN) 8012 8013 self._match_r_paren() 8014 8015 return self.expression( 8016 exp.DictProperty, 8017 this=this, 8018 kind=kind.this if kind else None, 8019 settings=settings, 8020 ) 8021 8022 def _parse_dict_range(self, this: str) -> exp.DictRange: 8023 self._match_l_paren() 8024 has_min = self._match_text_seq("MIN") 8025 if has_min: 8026 min = self._parse_var() or self._parse_primary() 8027 self._match_text_seq("MAX") 8028 max = self._parse_var() or self._parse_primary() 8029 else: 8030 max = self._parse_var() or self._parse_primary() 8031 min = exp.Literal.number(0) 8032 self._match_r_paren() 8033 return self.expression(exp.DictRange, this=this, min=min, max=max) 8034 8035 def _parse_comprehension( 8036 self, this: t.Optional[exp.Expression] 8037 ) -> t.Optional[exp.Comprehension]: 8038 index = self._index 8039 expression = self._parse_column() 8040 if not self._match(TokenType.IN): 8041 self._retreat(index - 1) 8042 return None 8043 iterator = self._parse_column() 8044 condition = self._parse_assignment() if self._match_text_seq("IF") else None 8045 return self.expression( 8046 exp.Comprehension, 8047 this=this, 8048 expression=expression, 8049 iterator=iterator, 8050 condition=condition, 8051 ) 8052 8053 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 8054 if self._match(TokenType.HEREDOC_STRING): 8055 return self.expression(exp.Heredoc, this=self._prev.text) 8056 8057 if not self._match_text_seq("$"): 8058 return None 8059 8060 tags = ["$"] 8061 tag_text = None 8062 8063 if self._is_connected(): 8064 self._advance() 8065 tags.append(self._prev.text.upper()) 8066 else: 8067 self.raise_error("No closing $ found") 8068 8069 if tags[-1] != "$": 8070 if self._is_connected() and self._match_text_seq("$"): 8071 tag_text = tags[-1] 8072 tags.append("$") 8073 else: 8074 self.raise_error("No closing $ found") 8075 8076 heredoc_start = self._curr 8077 8078 while self._curr: 8079 if self._match_text_seq(*tags, advance=False): 8080 this = self._find_sql(heredoc_start, self._prev) 8081 self._advance(len(tags)) 8082 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8083 8084 self._advance() 8085 8086 self.raise_error(f"No closing {''.join(tags)} found") 8087 return None 8088 8089 def _find_parser( 8090 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8091 ) -> t.Optional[t.Callable]: 8092 if not self._curr: 8093 return None 8094 8095 index = self._index 8096 this = [] 8097 while True: 8098 # The current token might be multiple words 8099 curr = self._curr.text.upper() 8100 key = curr.split(" ") 8101 this.append(curr) 8102 8103 self._advance() 8104 result, trie = in_trie(trie, key) 8105 if result == TrieResult.FAILED: 8106 break 8107 8108 if result == TrieResult.EXISTS: 8109 subparser = parsers[" ".join(this)] 8110 return subparser 8111 8112 self._retreat(index) 8113 return None 8114 8115 def _match(self, token_type, advance=True, expression=None): 8116 if not self._curr: 8117 return None 8118 8119 if self._curr.token_type == token_type: 8120 if advance: 8121 self._advance() 8122 self._add_comments(expression) 8123 return True 8124 8125 return None 8126 8127 def _match_set(self, types, advance=True): 8128 if not self._curr: 8129 return None 8130 8131 if self._curr.token_type in types: 8132 if advance: 8133 self._advance() 8134 return True 8135 8136 return None 8137 8138 def _match_pair(self, token_type_a, token_type_b, advance=True): 8139 if not self._curr or not self._next: 8140 return None 8141 8142 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8143 if advance: 8144 self._advance(2) 8145 return True 8146 8147 return None 8148 8149 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8150 if not self._match(TokenType.L_PAREN, expression=expression): 8151 self.raise_error("Expecting (") 8152 8153 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8154 if not self._match(TokenType.R_PAREN, expression=expression): 8155 self.raise_error("Expecting )") 8156 8157 def _match_texts(self, texts, advance=True): 8158 if ( 8159 self._curr 8160 and self._curr.token_type != TokenType.STRING 8161 and self._curr.text.upper() in texts 8162 ): 8163 if advance: 8164 self._advance() 8165 return True 8166 return None 8167 8168 def _match_text_seq(self, *texts, advance=True): 8169 index = self._index 8170 for text in texts: 8171 if ( 8172 self._curr 8173 and self._curr.token_type != TokenType.STRING 8174 and self._curr.text.upper() == text 8175 ): 8176 self._advance() 8177 else: 8178 self._retreat(index) 8179 return None 8180 8181 if not advance: 8182 self._retreat(index) 8183 8184 return True 8185 8186 def _replace_lambda( 8187 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8188 ) -> t.Optional[exp.Expression]: 8189 if not node: 8190 return node 8191 8192 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8193 8194 for column in node.find_all(exp.Column): 8195 typ = lambda_types.get(column.parts[0].name) 8196 if typ is not None: 8197 dot_or_id = column.to_dot() if column.table else column.this 8198 8199 if typ: 8200 dot_or_id = self.expression( 8201 exp.Cast, 8202 this=dot_or_id, 8203 to=typ, 8204 ) 8205 8206 parent = column.parent 8207 8208 while isinstance(parent, exp.Dot): 8209 if not isinstance(parent.parent, exp.Dot): 8210 parent.replace(dot_or_id) 8211 break 8212 parent = parent.parent 8213 else: 8214 if column is node: 8215 node = dot_or_id 8216 else: 8217 column.replace(dot_or_id) 8218 return node 8219 8220 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8221 start = self._prev 8222 8223 # Not to be confused with TRUNCATE(number, decimals) function call 8224 if self._match(TokenType.L_PAREN): 8225 self._retreat(self._index - 2) 8226 return self._parse_function() 8227 8228 # Clickhouse supports TRUNCATE DATABASE as well 8229 is_database = self._match(TokenType.DATABASE) 8230 8231 self._match(TokenType.TABLE) 8232 8233 exists = self._parse_exists(not_=False) 8234 8235 expressions = self._parse_csv( 8236 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8237 ) 8238 8239 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8240 8241 if self._match_text_seq("RESTART", "IDENTITY"): 8242 identity = "RESTART" 8243 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8244 identity = "CONTINUE" 8245 else: 8246 identity = None 8247 8248 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8249 option = self._prev.text 8250 else: 8251 option = None 8252 8253 partition = self._parse_partition() 8254 8255 # Fallback case 8256 if self._curr: 8257 return self._parse_as_command(start) 8258 8259 return self.expression( 8260 exp.TruncateTable, 8261 expressions=expressions, 8262 is_database=is_database, 8263 exists=exists, 8264 cluster=cluster, 8265 identity=identity, 8266 option=option, 8267 partition=partition, 8268 ) 8269 8270 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8271 this = self._parse_ordered(self._parse_opclass) 8272 8273 if not self._match(TokenType.WITH): 8274 return this 8275 8276 op = self._parse_var(any_token=True) 8277 8278 return self.expression(exp.WithOperator, this=this, op=op) 8279 8280 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8281 self._match(TokenType.EQ) 8282 self._match(TokenType.L_PAREN) 8283 8284 opts: t.List[t.Optional[exp.Expression]] = [] 8285 option: exp.Expression | None 8286 while self._curr and not self._match(TokenType.R_PAREN): 8287 if self._match_text_seq("FORMAT_NAME", "="): 8288 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8289 option = self._parse_format_name() 8290 else: 8291 option = self._parse_property() 8292 8293 if option is None: 8294 self.raise_error("Unable to parse option") 8295 break 8296 8297 opts.append(option) 8298 8299 return opts 8300 8301 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8302 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8303 8304 options = [] 8305 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8306 option = self._parse_var(any_token=True) 8307 prev = self._prev.text.upper() 8308 8309 # Different dialects might separate options and values by white space, "=" and "AS" 8310 self._match(TokenType.EQ) 8311 self._match(TokenType.ALIAS) 8312 8313 param = self.expression(exp.CopyParameter, this=option) 8314 8315 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8316 TokenType.L_PAREN, advance=False 8317 ): 8318 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8319 param.set("expressions", self._parse_wrapped_options()) 8320 elif prev == "FILE_FORMAT": 8321 # T-SQL's external file format case 8322 param.set("expression", self._parse_field()) 8323 else: 8324 param.set("expression", self._parse_unquoted_field()) 8325 8326 options.append(param) 8327 self._match(sep) 8328 8329 return options 8330 8331 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8332 expr = self.expression(exp.Credentials) 8333 8334 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8335 expr.set("storage", self._parse_field()) 8336 if self._match_text_seq("CREDENTIALS"): 8337 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8338 creds = ( 8339 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8340 ) 8341 expr.set("credentials", creds) 8342 if self._match_text_seq("ENCRYPTION"): 8343 expr.set("encryption", self._parse_wrapped_options()) 8344 if self._match_text_seq("IAM_ROLE"): 8345 expr.set("iam_role", self._parse_field()) 8346 if self._match_text_seq("REGION"): 8347 expr.set("region", self._parse_field()) 8348 8349 return expr 8350 8351 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8352 return self._parse_field() 8353 8354 def _parse_copy(self) -> exp.Copy | exp.Command: 8355 start = self._prev 8356 8357 self._match(TokenType.INTO) 8358 8359 this = ( 8360 self._parse_select(nested=True, parse_subquery_alias=False) 8361 if self._match(TokenType.L_PAREN, advance=False) 8362 else self._parse_table(schema=True) 8363 ) 8364 8365 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8366 8367 files = self._parse_csv(self._parse_file_location) 8368 credentials = self._parse_credentials() 8369 8370 self._match_text_seq("WITH") 8371 8372 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8373 8374 # Fallback case 8375 if self._curr: 8376 return self._parse_as_command(start) 8377 8378 return self.expression( 8379 exp.Copy, 8380 this=this, 8381 kind=kind, 8382 credentials=credentials, 8383 files=files, 8384 params=params, 8385 ) 8386 8387 def _parse_normalize(self) -> exp.Normalize: 8388 return self.expression( 8389 exp.Normalize, 8390 this=self._parse_bitwise(), 8391 form=self._match(TokenType.COMMA) and self._parse_var(), 8392 ) 8393 8394 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8395 args = self._parse_csv(lambda: self._parse_lambda()) 8396 8397 this = seq_get(args, 0) 8398 decimals = seq_get(args, 1) 8399 8400 return expr_type( 8401 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8402 ) 8403 8404 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8405 star_token = self._prev 8406 8407 if self._match_text_seq("COLUMNS", "(", advance=False): 8408 this = self._parse_function() 8409 if isinstance(this, exp.Columns): 8410 this.set("unpack", True) 8411 return this 8412 8413 return self.expression( 8414 exp.Star, 8415 **{ # type: ignore 8416 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8417 "replace": self._parse_star_op("REPLACE"), 8418 "rename": self._parse_star_op("RENAME"), 8419 }, 8420 ).update_positions(star_token) 8421 8422 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8423 privilege_parts = [] 8424 8425 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8426 # (end of privilege list) or L_PAREN (start of column list) are met 8427 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8428 privilege_parts.append(self._curr.text.upper()) 8429 self._advance() 8430 8431 this = exp.var(" ".join(privilege_parts)) 8432 expressions = ( 8433 self._parse_wrapped_csv(self._parse_column) 8434 if self._match(TokenType.L_PAREN, advance=False) 8435 else None 8436 ) 8437 8438 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8439 8440 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8441 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8442 principal = self._parse_id_var() 8443 8444 if not principal: 8445 return None 8446 8447 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8448 8449 def _parse_grant_revoke_common( 8450 self, 8451 ) -> t.Tuple[t.Optional[t.List], t.Optional[str], t.Optional[exp.Expression]]: 8452 privileges = self._parse_csv(self._parse_grant_privilege) 8453 8454 self._match(TokenType.ON) 8455 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8456 8457 # Attempt to parse the securable e.g. MySQL allows names 8458 # such as "foo.*", "*.*" which are not easily parseable yet 8459 securable = self._try_parse(self._parse_table_parts) 8460 8461 return privileges, kind, securable 8462 8463 def _parse_grant(self) -> exp.Grant | exp.Command: 8464 start = self._prev 8465 8466 privileges, kind, securable = self._parse_grant_revoke_common() 8467 8468 if not securable or not self._match_text_seq("TO"): 8469 return self._parse_as_command(start) 8470 8471 principals = self._parse_csv(self._parse_grant_principal) 8472 8473 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8474 8475 if self._curr: 8476 return self._parse_as_command(start) 8477 8478 return self.expression( 8479 exp.Grant, 8480 privileges=privileges, 8481 kind=kind, 8482 securable=securable, 8483 principals=principals, 8484 grant_option=grant_option, 8485 ) 8486 8487 def _parse_revoke(self) -> exp.Revoke | exp.Command: 8488 start = self._prev 8489 8490 grant_option = self._match_text_seq("GRANT", "OPTION", "FOR") 8491 8492 privileges, kind, securable = self._parse_grant_revoke_common() 8493 8494 if not securable or not self._match_text_seq("FROM"): 8495 return self._parse_as_command(start) 8496 8497 principals = self._parse_csv(self._parse_grant_principal) 8498 8499 cascade = None 8500 if self._match_texts(("CASCADE", "RESTRICT")): 8501 cascade = self._prev.text.upper() 8502 8503 if self._curr: 8504 return self._parse_as_command(start) 8505 8506 return self.expression( 8507 exp.Revoke, 8508 privileges=privileges, 8509 kind=kind, 8510 securable=securable, 8511 principals=principals, 8512 grant_option=grant_option, 8513 cascade=cascade, 8514 ) 8515 8516 def _parse_overlay(self) -> exp.Overlay: 8517 return self.expression( 8518 exp.Overlay, 8519 **{ # type: ignore 8520 "this": self._parse_bitwise(), 8521 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8522 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8523 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8524 }, 8525 ) 8526 8527 def _parse_format_name(self) -> exp.Property: 8528 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8529 # for FILE_FORMAT = <format_name> 8530 return self.expression( 8531 exp.Property, 8532 this=exp.var("FORMAT_NAME"), 8533 value=self._parse_string() or self._parse_table_parts(), 8534 ) 8535 8536 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8537 args: t.List[exp.Expression] = [] 8538 8539 if self._match(TokenType.DISTINCT): 8540 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8541 self._match(TokenType.COMMA) 8542 8543 args.extend(self._parse_csv(self._parse_assignment)) 8544 8545 return self.expression( 8546 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8547 ) 8548 8549 def _identifier_expression( 8550 self, token: t.Optional[Token] = None, **kwargs: t.Any 8551 ) -> exp.Identifier: 8552 token = token or self._prev 8553 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8554 expression.update_positions(token) 8555 return expression 8556 8557 def _build_pipe_cte( 8558 self, 8559 query: exp.Query, 8560 expressions: t.List[exp.Expression], 8561 alias_cte: t.Optional[exp.TableAlias] = None, 8562 ) -> exp.Select: 8563 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8564 if alias_cte: 8565 new_cte = alias_cte 8566 else: 8567 self._pipe_cte_counter += 1 8568 new_cte = f"__tmp{self._pipe_cte_counter}" 8569 8570 with_ = query.args.get("with") 8571 ctes = with_.pop() if with_ else None 8572 8573 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8574 if ctes: 8575 new_select.set("with", ctes) 8576 8577 return new_select.with_(new_cte, as_=query, copy=False) 8578 8579 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8580 select = self._parse_select(consume_pipe=False) 8581 if not select: 8582 return query 8583 8584 return self._build_pipe_cte( 8585 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8586 ) 8587 8588 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8589 limit = self._parse_limit() 8590 offset = self._parse_offset() 8591 if limit: 8592 curr_limit = query.args.get("limit", limit) 8593 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8594 query.limit(limit, copy=False) 8595 if offset: 8596 curr_offset = query.args.get("offset") 8597 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8598 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8599 8600 return query 8601 8602 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8603 this = self._parse_assignment() 8604 if self._match_text_seq("GROUP", "AND", advance=False): 8605 return this 8606 8607 this = self._parse_alias(this) 8608 8609 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8610 return self._parse_ordered(lambda: this) 8611 8612 return this 8613 8614 def _parse_pipe_syntax_aggregate_group_order_by( 8615 self, query: exp.Select, group_by_exists: bool = True 8616 ) -> exp.Select: 8617 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8618 aggregates_or_groups, orders = [], [] 8619 for element in expr: 8620 if isinstance(element, exp.Ordered): 8621 this = element.this 8622 if isinstance(this, exp.Alias): 8623 element.set("this", this.args["alias"]) 8624 orders.append(element) 8625 else: 8626 this = element 8627 aggregates_or_groups.append(this) 8628 8629 if group_by_exists: 8630 query.select(*aggregates_or_groups, copy=False).group_by( 8631 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8632 copy=False, 8633 ) 8634 else: 8635 query.select(*aggregates_or_groups, append=False, copy=False) 8636 8637 if orders: 8638 return query.order_by(*orders, append=False, copy=False) 8639 8640 return query 8641 8642 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8643 self._match_text_seq("AGGREGATE") 8644 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8645 8646 if self._match(TokenType.GROUP_BY) or ( 8647 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8648 ): 8649 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8650 8651 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8652 8653 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8654 first_setop = self.parse_set_operation(this=query) 8655 if not first_setop: 8656 return None 8657 8658 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8659 expr = self._parse_paren() 8660 return expr.assert_is(exp.Subquery).unnest() if expr else None 8661 8662 first_setop.this.pop() 8663 8664 setops = [ 8665 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8666 *self._parse_csv(_parse_and_unwrap_query), 8667 ] 8668 8669 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8670 with_ = query.args.get("with") 8671 ctes = with_.pop() if with_ else None 8672 8673 if isinstance(first_setop, exp.Union): 8674 query = query.union(*setops, copy=False, **first_setop.args) 8675 elif isinstance(first_setop, exp.Except): 8676 query = query.except_(*setops, copy=False, **first_setop.args) 8677 else: 8678 query = query.intersect(*setops, copy=False, **first_setop.args) 8679 8680 query.set("with", ctes) 8681 8682 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8683 8684 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8685 join = self._parse_join() 8686 if not join: 8687 return None 8688 8689 if isinstance(query, exp.Select): 8690 return query.join(join, copy=False) 8691 8692 return query 8693 8694 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8695 pivots = self._parse_pivots() 8696 if not pivots: 8697 return query 8698 8699 from_ = query.args.get("from") 8700 if from_: 8701 from_.this.set("pivots", pivots) 8702 8703 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8704 8705 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8706 self._match_text_seq("EXTEND") 8707 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8708 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8709 8710 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8711 sample = self._parse_table_sample() 8712 8713 with_ = query.args.get("with") 8714 if with_: 8715 with_.expressions[-1].this.set("sample", sample) 8716 else: 8717 query.set("sample", sample) 8718 8719 return query 8720 8721 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8722 if isinstance(query, exp.Subquery): 8723 query = exp.select("*").from_(query, copy=False) 8724 8725 if not query.args.get("from"): 8726 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8727 8728 while self._match(TokenType.PIPE_GT): 8729 start = self._curr 8730 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8731 if not parser: 8732 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8733 # keywords, making it tricky to disambiguate them without lookahead. The approach 8734 # here is to try and parse a set operation and if that fails, then try to parse a 8735 # join operator. If that fails as well, then the operator is not supported. 8736 parsed_query = self._parse_pipe_syntax_set_operator(query) 8737 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8738 if not parsed_query: 8739 self._retreat(start) 8740 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8741 break 8742 query = parsed_query 8743 else: 8744 query = parser(self, query) 8745 8746 return query 8747 8748 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8749 vars = self._parse_csv(self._parse_id_var) 8750 if not vars: 8751 return None 8752 8753 return self.expression( 8754 exp.DeclareItem, 8755 this=vars, 8756 kind=self._parse_types(), 8757 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8758 ) 8759 8760 def _parse_declare(self) -> exp.Declare | exp.Command: 8761 start = self._prev 8762 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8763 8764 if not expressions or self._curr: 8765 return self._parse_as_command(start) 8766 8767 return self.expression(exp.Declare, expressions=expressions) 8768 8769 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8770 exp_class = exp.Cast if strict else exp.TryCast 8771 8772 if exp_class == exp.TryCast: 8773 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8774 8775 return self.expression(exp_class, **kwargs) 8776 8777 def _parse_json_value(self) -> exp.JSONValue: 8778 this = self._parse_bitwise() 8779 self._match(TokenType.COMMA) 8780 path = self._parse_bitwise() 8781 8782 returning = self._match(TokenType.RETURNING) and self._parse_type() 8783 8784 return self.expression( 8785 exp.JSONValue, 8786 this=this, 8787 path=self.dialect.to_json_path(path), 8788 returning=returning, 8789 on_condition=self._parse_on_condition(), 8790 ) 8791 8792 def _parse_group_concat(self) -> t.Optional[exp.Expression]: 8793 def concat_exprs( 8794 node: t.Optional[exp.Expression], exprs: t.List[exp.Expression] 8795 ) -> exp.Expression: 8796 if isinstance(node, exp.Distinct) and len(node.expressions) > 1: 8797 concat_exprs = [ 8798 self.expression(exp.Concat, expressions=node.expressions, safe=True) 8799 ] 8800 node.set("expressions", concat_exprs) 8801 return node 8802 if len(exprs) == 1: 8803 return exprs[0] 8804 return self.expression(exp.Concat, expressions=args, safe=True) 8805 8806 args = self._parse_csv(self._parse_lambda) 8807 8808 if args: 8809 order = args[-1] if isinstance(args[-1], exp.Order) else None 8810 8811 if order: 8812 # Order By is the last (or only) expression in the list and has consumed the 'expr' before it, 8813 # remove 'expr' from exp.Order and add it back to args 8814 args[-1] = order.this 8815 order.set("this", concat_exprs(order.this, args)) 8816 8817 this = order or concat_exprs(args[0], args) 8818 else: 8819 this = None 8820 8821 separator = self._parse_field() if self._match(TokenType.SEPARATOR) else None 8822 8823 return self.expression(exp.GroupConcat, this=this, separator=separator)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1578 def __init__( 1579 self, 1580 error_level: t.Optional[ErrorLevel] = None, 1581 error_message_context: int = 100, 1582 max_errors: int = 3, 1583 dialect: DialectType = None, 1584 ): 1585 from sqlglot.dialects import Dialect 1586 1587 self.error_level = error_level or ErrorLevel.IMMEDIATE 1588 self.error_message_context = error_message_context 1589 self.max_errors = max_errors 1590 self.dialect = Dialect.get_or_raise(dialect) 1591 self.reset()
1604 def parse( 1605 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1606 ) -> t.List[t.Optional[exp.Expression]]: 1607 """ 1608 Parses a list of tokens and returns a list of syntax trees, one tree 1609 per parsed SQL statement. 1610 1611 Args: 1612 raw_tokens: The list of tokens. 1613 sql: The original SQL string, used to produce helpful debug messages. 1614 1615 Returns: 1616 The list of the produced syntax trees. 1617 """ 1618 return self._parse( 1619 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1620 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1622 def parse_into( 1623 self, 1624 expression_types: exp.IntoType, 1625 raw_tokens: t.List[Token], 1626 sql: t.Optional[str] = None, 1627 ) -> t.List[t.Optional[exp.Expression]]: 1628 """ 1629 Parses a list of tokens into a given Expression type. If a collection of Expression 1630 types is given instead, this method will try to parse the token list into each one 1631 of them, stopping at the first for which the parsing succeeds. 1632 1633 Args: 1634 expression_types: The expression type(s) to try and parse the token list into. 1635 raw_tokens: The list of tokens. 1636 sql: The original SQL string, used to produce helpful debug messages. 1637 1638 Returns: 1639 The target Expression. 1640 """ 1641 errors = [] 1642 for expression_type in ensure_list(expression_types): 1643 parser = self.EXPRESSION_PARSERS.get(expression_type) 1644 if not parser: 1645 raise TypeError(f"No parser registered for {expression_type}") 1646 1647 try: 1648 return self._parse(parser, raw_tokens, sql) 1649 except ParseError as e: 1650 e.errors[0]["into_expression"] = expression_type 1651 errors.append(e) 1652 1653 raise ParseError( 1654 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1655 errors=merge_errors(errors), 1656 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1696 def check_errors(self) -> None: 1697 """Logs or raises any found errors, depending on the chosen error level setting.""" 1698 if self.error_level == ErrorLevel.WARN: 1699 for error in self.errors: 1700 logger.error(str(error)) 1701 elif self.error_level == ErrorLevel.RAISE and self.errors: 1702 raise ParseError( 1703 concat_messages(self.errors, self.max_errors), 1704 errors=merge_errors(self.errors), 1705 )
Logs or raises any found errors, depending on the chosen error level setting.
1707 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1708 """ 1709 Appends an error in the list of recorded errors or raises it, depending on the chosen 1710 error level setting. 1711 """ 1712 token = token or self._curr or self._prev or Token.string("") 1713 start = token.start 1714 end = token.end + 1 1715 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1716 highlight = self.sql[start:end] 1717 end_context = self.sql[end : end + self.error_message_context] 1718 1719 error = ParseError.new( 1720 f"{message}. Line {token.line}, Col: {token.col}.\n" 1721 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1722 description=message, 1723 line=token.line, 1724 col=token.col, 1725 start_context=start_context, 1726 highlight=highlight, 1727 end_context=end_context, 1728 ) 1729 1730 if self.error_level == ErrorLevel.IMMEDIATE: 1731 raise error 1732 1733 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1735 def expression( 1736 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1737 ) -> E: 1738 """ 1739 Creates a new, validated Expression. 1740 1741 Args: 1742 exp_class: The expression class to instantiate. 1743 comments: An optional list of comments to attach to the expression. 1744 kwargs: The arguments to set for the expression along with their respective values. 1745 1746 Returns: 1747 The target expression. 1748 """ 1749 instance = exp_class(**kwargs) 1750 instance.add_comments(comments) if comments else self._add_comments(instance) 1751 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1758 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1759 """ 1760 Validates an Expression, making sure that all its mandatory arguments are set. 1761 1762 Args: 1763 expression: The expression to validate. 1764 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1765 1766 Returns: 1767 The validated expression. 1768 """ 1769 if self.error_level != ErrorLevel.IGNORE: 1770 for error_message in expression.error_messages(args): 1771 self.raise_error(error_message) 1772 1773 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4843 def parse_set_operation( 4844 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4845 ) -> t.Optional[exp.Expression]: 4846 start = self._index 4847 _, side_token, kind_token = self._parse_join_parts() 4848 4849 side = side_token.text if side_token else None 4850 kind = kind_token.text if kind_token else None 4851 4852 if not self._match_set(self.SET_OPERATIONS): 4853 self._retreat(start) 4854 return None 4855 4856 token_type = self._prev.token_type 4857 4858 if token_type == TokenType.UNION: 4859 operation: t.Type[exp.SetOperation] = exp.Union 4860 elif token_type == TokenType.EXCEPT: 4861 operation = exp.Except 4862 else: 4863 operation = exp.Intersect 4864 4865 comments = self._prev.comments 4866 4867 if self._match(TokenType.DISTINCT): 4868 distinct: t.Optional[bool] = True 4869 elif self._match(TokenType.ALL): 4870 distinct = False 4871 else: 4872 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4873 if distinct is None: 4874 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4875 4876 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4877 "STRICT", "CORRESPONDING" 4878 ) 4879 if self._match_text_seq("CORRESPONDING"): 4880 by_name = True 4881 if not side and not kind: 4882 kind = "INNER" 4883 4884 on_column_list = None 4885 if by_name and self._match_texts(("ON", "BY")): 4886 on_column_list = self._parse_wrapped_csv(self._parse_column) 4887 4888 expression = self._parse_select( 4889 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4890 ) 4891 4892 return self.expression( 4893 operation, 4894 comments=comments, 4895 this=this, 4896 distinct=distinct, 4897 by_name=by_name, 4898 expression=expression, 4899 side=side, 4900 kind=kind, 4901 on=on_column_list, 4902 )